[-]
[+]
|
Changed |
x264.spec
|
|
[-]
[+]
|
Deleted |
x264-rewrite-ffmpeg-defaults.patch
^
|
@@ -1,18 +0,0 @@
-diff -ur x264-snapshot-20110225-2245.orig/encoder/encoder.c x264-snapshot-20110225-2245/encoder/encoder.c
---- x264-snapshot-20110225-2245.orig/encoder/encoder.c 2011-02-25 22:45:04.000000000 +0100
-+++ x264-snapshot-20110225-2245/encoder/encoder.c 2011-02-26 14:24:02.144281162 +0100
-@@ -487,12 +487,8 @@
- score += h->param.analyse.inter == 0 && h->param.analyse.i_subpel_refine == 8;
- if( score >= 5 )
- {
-- x264_log( h, X264_LOG_ERROR, "broken ffmpeg default settings detected\n" );
-- x264_log( h, X264_LOG_ERROR, "use an encoding preset (e.g. -vpre medium)\n" );
-- x264_log( h, X264_LOG_ERROR, "preset usage: -vpre <speed> -vpre <profile>\n" );
-- x264_log( h, X264_LOG_ERROR, "speed presets are listed in x264 --help\n" );
-- x264_log( h, X264_LOG_ERROR, "profile is optional; x264 defaults to high\n" );
-- return -1;
-+ /* broken ffmpeg defaults, set to h264 defaults */
-+ x264_param_default( &h->param );
- }
- }
-
|
[-]
[+]
|
Changed |
x264-use-shared-library.patch
^
|
@@ -1,31 +1,15 @@
-diff -ur x264-snapshot-20110225-2245.orig/Makefile x264-snapshot-20110225-2245/Makefile
---- x264-snapshot-20110225-2245.orig/Makefile 2011-02-25 22:45:04.000000000 +0100
-+++ x264-snapshot-20110225-2245/Makefile 2011-02-26 14:25:51.568295374 +0100
-@@ -145,9 +145,10 @@
+--- Makefile.orig 2011-05-27 22:45:04.000000000 +0200
++++ Makefile 2011-05-28 15:18:29.883305471 +0200
+@@ -149,9 +149,10 @@
$(SONAME): .depend $(OBJS) $(OBJASM) $(OBJSO)
- $(CC) -shared -o $@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS)
+ $(LD)$@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS)
+ ln -s $(SONAME) libx264.so
--x264$(EXE): $(OBJCLI) libx264.a
-- $(CC) -o $@ $+ $(LDFLAGSCLI) $(LDFLAGS)
-+x264$(EXE): $(OBJCLI) $(SONAME)
-+ $(CC) -o $@ $(OBJCLI) -L. -lx264 $(LDFLAGSCLI) $(LDFLAGS)
+-x264$(EXE): .depend $(OBJCLI) $(CLI_LIBX264)
+- $(LD)$@ $(OBJCLI) $(CLI_LIBX264) $(LDFLAGSCLI) $(LDFLAGS)
++x264$(EXE): .depend $(OBJCLI) $(SONAME)
++ $(LD)$@ $(OBJCLI) -L. -lx264 $(LDFLAGSCLI) $(LDFLAGS)
- checkasm: tools/checkasm.o libx264.a
- $(CC) -o $@ $+ $(LDFLAGS)
-@@ -219,10 +220,12 @@
- install -d $(DESTDIR)$(libdir)/pkgconfig
- install -m 644 x264.h $(DESTDIR)$(includedir)
- install -m 644 x264_config.h $(DESTDIR)$(includedir)
-- install -m 644 libx264.a $(DESTDIR)$(libdir)
- install -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
- install x264$(EXE) $(DESTDIR)$(bindir)
-- $(RANLIB) $(DESTDIR)$(libdir)/libx264.a
-+ if [ -e libx264.a ]; then \
-+ install -m 644 libx264.a $(DESTDIR)$(libdir); \
-+ $(RANLIB) $(DESTDIR)$(libdir)/libx264.a; \
-+ fi
- ifeq ($(SYS),MINGW)
- $(if $(SONAME), install -m 755 $(SONAME) $(DESTDIR)$(bindir))
- else
+ checkasm: tools/checkasm.o $(LIBX264)
+ $(LD)$@ $+ $(LDFLAGS)
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/.gitignore
^
|
@@ -5,6 +5,9 @@
*.rej
*.dll*
*.exe
+*.def
+*.lib
+*.pdb
*.mo
*.o
*.patch
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/Makefile
^
|
@@ -125,7 +125,7 @@
endif
ifneq ($(SONAME),)
-ifeq ($(SYS),MINGW)
+ifeq ($(SYS),WINDOWS)
SRCSO += x264dll.c
endif
endif
@@ -135,34 +135,38 @@
OBJSO = $(SRCSO:%.c=%.o)
DEP = depend
-.PHONY: all default fprofiled clean distclean install uninstall dox test testclean
+.PHONY: all default fprofiled clean distclean install uninstall dox test testclean lib-static lib-shared cli install-lib-dev install-lib-static install-lib-shared install-cli
-default: $(DEP) x264$(EXE)
+default: $(DEP)
-libx264.a: .depend $(OBJS) $(OBJASM)
- $(AR) rc libx264.a $(OBJS) $(OBJASM)
- $(RANLIB) libx264.a
+cli: x264$(EXE)
+lib-static: $(LIBX264)
+lib-shared: $(SONAME)
+
+$(LIBX264): .depend $(OBJS) $(OBJASM)
+ $(AR)$@ $(OBJS) $(OBJASM)
+ $(if $(RANLIB), $(RANLIB) $@)
$(SONAME): .depend $(OBJS) $(OBJASM) $(OBJSO)
- $(CC) -shared -o $@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS)
+ $(LD)$@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS)
-x264$(EXE): $(OBJCLI) libx264.a
- $(CC) -o $@ $+ $(LDFLAGSCLI) $(LDFLAGS)
+x264$(EXE): .depend $(OBJCLI) $(CLI_LIBX264)
+ $(LD)$@ $(OBJCLI) $(CLI_LIBX264) $(LDFLAGSCLI) $(LDFLAGS)
-checkasm: tools/checkasm.o libx264.a
- $(CC) -o $@ $+ $(LDFLAGS)
+checkasm: tools/checkasm.o $(LIBX264)
+ $(LD)$@ $+ $(LDFLAGS)
%.o: %.asm
$(AS) $(ASFLAGS) -o $@ $<
- -@ $(STRIP) -x $@ # delete local/anonymous symbols, so they don't show up in oprofile
+ -@ $(if $(STRIP), $(STRIP) -x $@) # delete local/anonymous symbols, so they don't show up in oprofile
%.o: %.S
$(AS) $(ASFLAGS) -o $@ $<
- -@ $(STRIP) -x $@ # delete local/anonymous symbols, so they don't show up in oprofile
+ -@ $(if $(STRIP), $(STRIP) -x $@) # delete local/anonymous symbols, so they don't show up in oprofile
.depend: config.mak
@rm -f .depend
- @$(foreach SRC, $(SRCS) $(SRCCLI) $(SRCSO), $(CC) $(CFLAGS) $(SRC) -MT $(SRC:%.c=%.o) -MM -g0 1>> .depend;)
+ @$(foreach SRC, $(SRCS) $(SRCCLI) $(SRCSO), $(CC) $(CFLAGS) $(SRC) $(DEPMT) $(SRC:%.c=%.o) $(DEPMM) 1>> .depend;)
config.mak:
./configure
@@ -191,39 +195,40 @@
else
fprofiled:
$(MAKE) clean
- mv config.mak config.mak2
- sed -e 's/CFLAGS.*/& -fprofile-generate/; s/LDFLAGS.*/& -fprofile-generate/' config.mak2 > config.mak
- $(MAKE) x264$(EXE)
+ $(MAKE) x264$(EXE) CFLAGS="$(CFLAGS) $(PROF_GEN_CC)" LDFLAGS="$(LDFLAGS) $(PROF_GEN_LD)"
$(foreach V, $(VIDS), $(foreach I, 0 1 2 3 4 5 6 7, ./x264$(EXE) $(OPT$I) --threads 1 $(V) -o $(DEVNULL) ;))
rm -f $(SRC2:%.c=%.o)
- sed -e 's/CFLAGS.*/& -fprofile-use/; s/LDFLAGS.*/& -fprofile-use/' config.mak2 > config.mak
- $(MAKE)
- rm -f $(SRC2:%.c=%.gcda) $(SRC2:%.c=%.gcno)
- mv config.mak2 config.mak
+ $(MAKE) CFLAGS="$(CFLAGS) $(PROF_USE_CC)" LDFLAGS="$(LDFLAGS) $(PROF_USE_LD)"
+ rm -f $(SRC2:%.c=%.gcda) $(SRC2:%.c=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock
endif
clean:
- rm -f $(OBJS) $(OBJASM) $(OBJCLI) $(OBJSO) $(SONAME) *.a x264 x264.exe .depend TAGS
+ rm -f $(OBJS) $(OBJASM) $(OBJCLI) $(OBJSO) $(SONAME) *.a *.lib *.exp *.pdb x264 x264.exe .depend TAGS
rm -f checkasm checkasm.exe tools/checkasm.o tools/checkasm-a.o
- rm -f $(SRC2:%.c=%.gcda) $(SRC2:%.c=%.gcno)
- - sed -e 's/ *-fprofile-\(generate\|use\)//g' config.mak > config.mak2 && mv config.mak2 config.mak
+ rm -f $(SRC2:%.c=%.gcda) $(SRC2:%.c=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock
distclean: clean
- rm -f config.mak x264_config.h config.h config.log x264.pc
+ rm -f config.mak x264_config.h config.h config.log x264.pc x264.def
rm -rf test/
-install: x264$(EXE) $(SONAME)
+install-cli: cli
install -d $(DESTDIR)$(bindir)
+ install x264$(EXE) $(DESTDIR)$(bindir)
+
+install-lib-dev:
install -d $(DESTDIR)$(includedir)
install -d $(DESTDIR)$(libdir)
install -d $(DESTDIR)$(libdir)/pkgconfig
install -m 644 x264.h $(DESTDIR)$(includedir)
install -m 644 x264_config.h $(DESTDIR)$(includedir)
- install -m 644 libx264.a $(DESTDIR)$(libdir)
install -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
- install x264$(EXE) $(DESTDIR)$(bindir)
- $(RANLIB) $(DESTDIR)$(libdir)/libx264.a
-ifeq ($(SYS),MINGW)
+
+install-lib-static: lib-static install-lib-dev
+ install -m 644 $(LIBX264) $(DESTDIR)$(libdir)
+ $(if $(RANLIB), $(RANLIB) $(DESTDIR)$(libdir)/$(LIBX264))
+
+install-lib-shared: lib-shared install-lib-dev
+ifeq ($(SYS),WINDOWS)
$(if $(SONAME), install -m 755 $(SONAME) $(DESTDIR)$(bindir))
else
$(if $(SONAME), ln -f -s $(SONAME) $(DESTDIR)$(libdir)/libx264.$(SOSUFFIX))
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/arm/mc-a.S
^
|
@@ -106,17 +106,21 @@
vst1.64 {d0-d1}, [r3,:r3align]!
32: // n is a multiple of 32
tst r2, #32
- beq 64f
+ beq 640f
sub r2, #32
vld1.64 {d0-d3}, [r1,:r1align]!
vst1.64 {d0-d3}, [r3,:r3align]!
-64: // n is a multiple of 64
+640: // n is a multiple of 64
+ cmp r2, #0
+ beq 1f
+64:
subs r2, #64
vld1.64 {d0-d3}, [r1,:r1align]!
vld1.64 {d4-d7}, [r1,:r1align]!
vst1.64 {d0-d3}, [r3,:r3align]!
vst1.64 {d4-d7}, [r3,:r3align]!
bgt 64b
+1: // end
.if \srcalign == 8 && \dstalign == 8
vld1.64 {d0}, [r1,:64]!
vst1.64 {d0}, [r3,:64]!
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/cabac.c
^
|
@@ -708,11 +708,12 @@
{118, 122}, {123, 119}, {120, 124}, {125, 121}, {122, 126}, {127, 123}, {124, 127}, {126, 125}
};
-const uint8_t x264_cabac_renorm_shift[64]= {
- 6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+const uint8_t x264_cabac_renorm_shift[64] =
+{
+ 6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
/* -ln2(probability) */
@@ -752,24 +753,29 @@
FIX8(0.9285), FIX8(1.0752), FIX8(1.0000), FIX8(1.0000)
};
+uint8_t x264_cabac_contexts[4][QP_MAX_SPEC+1][460];
+
+void x264_cabac_init( void )
+{
+ for( int i = 0; i < 4; i++ )
+ {
+ const int8_t (*cabac_context_init)[460][2] = i == 0 ? &x264_cabac_context_init_I
+ : &x264_cabac_context_init_PB[i-1];
+ for( int qp = 0; qp <= QP_MAX_SPEC; qp++ )
+ for( int j = 0; j < 460; j++ )
+ {
+ int state = x264_clip3( (((*cabac_context_init)[j][0] * qp) >> 4) + (*cabac_context_init)[j][1], 1, 126 );
+ x264_cabac_contexts[i][qp][j] = (X264_MIN( state, 127-state ) << 1) | (state >> 6);
+ }
+ }
+}
/*****************************************************************************
*
*****************************************************************************/
void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model )
{
- const int8_t (*cabac_context_init)[460][2];
-
- if( i_slice_type == SLICE_TYPE_I )
- cabac_context_init = &x264_cabac_context_init_I;
- else
- cabac_context_init = &x264_cabac_context_init_PB[i_model];
-
- for( int i = 0; i < 460; i++ )
- {
- int state = x264_clip3( (((*cabac_context_init)[i][0] * i_qp) >> 4) + (*cabac_context_init)[i][1], 1, 126 );
- cb->state[i] = (X264_MIN( state, 127-state ) << 1) | (state >> 6);
- }
+ memcpy( cb->state, x264_cabac_contexts[i_slice_type == SLICE_TYPE_I ? 0 : i_model + 1][i_qp], 460 );
}
void x264_cabac_encode_init_core( x264_cabac_t *cb )
@@ -846,10 +852,11 @@
x264_cabac_encode_renorm( cb );
}
+/* Note: b is negated for this function */
void x264_cabac_encode_bypass_c( x264_cabac_t *cb, int b )
{
cb->i_low <<= 1;
- cb->i_low += -b & cb->i_range;
+ cb->i_low += b & cb->i_range;
cb->i_queue += 1;
x264_cabac_putbyte( cb );
}
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/common.c
^
|
@@ -443,6 +443,7 @@
param->analyse.b_transform_8x8 = 0;
param->b_cabac = 0;
param->i_cqm_preset = X264_CQM_FLAT;
+ param->psz_cqm_file = NULL;
param->i_bframe = 0;
param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
if( param->b_interlaced )
@@ -460,6 +461,7 @@
{
param->analyse.b_transform_8x8 = 0;
param->i_cqm_preset = X264_CQM_FLAT;
+ param->psz_cqm_file = NULL;
}
else if( !strcasecmp( profile, "high" ) || !strcasecmp( profile, "high10" ) )
{
@@ -621,6 +623,8 @@
else
p->i_level_idc = atoi(value);
}
+ OPT("bluray-compat")
+ p->b_bluray_compat = atobool(value);
OPT("sar")
{
b_error = ( 2 != sscanf( value, "%d:%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) &&
@@ -705,14 +709,7 @@
}
}
OPT("open-gop")
- {
- b_error |= parse_enum( value, x264_open_gop_names, &p->i_open_gop );
- if( b_error )
- {
- b_error = 0;
- p->i_open_gop = atoi(value);
- }
- }
+ p->b_open_gop = atobool(value);
OPT("nf")
p->b_deblocking_filter = !atobool(value);
OPT2("filter", "deblock")
@@ -1095,7 +1092,7 @@
void *x264_malloc( int i_size )
{
uint8_t *align_buf = NULL;
-#if SYS_MACOSX || (SYS_MINGW && ARCH_X86_64)
+#if SYS_MACOSX || (SYS_WINDOWS && ARCH_X86_64)
/* Mac OS X and Win x64 always returns 16 byte aligned memory */
align_buf = malloc( i_size );
#elif HAVE_MALLOC_H
@@ -1121,7 +1118,7 @@
{
if( p )
{
-#if HAVE_MALLOC_H || SYS_MACOSX || (SYS_MINGW && ARCH_X86_64)
+#if HAVE_MALLOC_H || SYS_MACOSX || (SYS_WINDOWS && ARCH_X86_64)
free( p );
#else
free( *( ( ( void **) p ) - 1 ) );
@@ -1160,7 +1157,7 @@
char *x264_slurp_file( const char *filename )
{
int b_error = 0;
- int i_size;
+ size_t i_size;
char *buf;
FILE *fh = fopen( filename, "rb" );
if( !fh )
@@ -1240,6 +1237,7 @@
s += sprintf( s, " nr=%d", p->analyse.i_noise_reduction );
s += sprintf( s, " decimate=%d", p->analyse.b_dct_decimate );
s += sprintf( s, " interlaced=%s", p->b_interlaced ? p->b_tff ? "tff" : "bff" : p->b_fake_interlaced ? "fake" : "0" );
+ s += sprintf( s, " bluray_compat=%d", p->b_bluray_compat );
s += sprintf( s, " constrained_intra=%d", p->b_constrained_intra );
@@ -1248,7 +1246,7 @@
{
s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d open_gop=%d",
p->i_bframe_pyramid, p->i_bframe_adaptive, p->i_bframe_bias,
- p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred, p->i_open_gop );
+ p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred, p->b_open_gop );
}
s += sprintf( s, " weightp=%d", p->analyse.i_weighted_pred > 0 ? p->analyse.i_weighted_pred : 0 );
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/common.h
^
|
@@ -92,6 +92,16 @@
#include <assert.h>
#include <limits.h>
+#if HAVE_INTERLACED
+# define MB_INTERLACED h->mb.b_interlaced
+# define SLICE_MBAFF h->sh.b_mbaff
+# define PARAM_INTERLACED h->param.b_interlaced
+#else
+# define MB_INTERLACED 0
+# define SLICE_MBAFF 0
+# define PARAM_INTERLACED 0
+#endif
+
/* Unions for type-punning.
* Mn: load or store n bits, aligned, native-endian
* CPn: copy n bits, aligned, native-endian
@@ -137,7 +147,7 @@
#define X264_SCAN8_LUMA_SIZE (5*8)
#define X264_SCAN8_0 (4+1*8)
-static const int x264_scan8[16+2*4+3] =
+static const unsigned x264_scan8[16+2*4+3] =
{
/* Luma */
4+1*8, 5+1*8, 4+2*8, 5+2*8,
@@ -205,7 +215,8 @@
void x264_reduce_fraction( uint32_t *n, uint32_t *d );
void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
-void x264_init_vlc_tables( void );
+void x264_cavlc_init( void );
+void x264_cabac_init( void );
static ALWAYS_INLINE pixel x264_clip_pixel( int x )
{
@@ -310,6 +321,7 @@
SEI_USER_DATA_REGISTERED = 4,
SEI_USER_DATA_UNREGISTERED = 5,
SEI_RECOVERY_POINT = 6,
+ SEI_DEC_REF_PIC_MARKING = 7,
SEI_FRAME_PACKING = 45,
};
@@ -392,6 +404,15 @@
typedef struct x264_ratecontrol_t x264_ratecontrol_t;
+typedef struct x264_left_table_t
+{
+ uint8_t intra[4];
+ uint8_t nnz[4];
+ uint8_t nnz_chroma[4];
+ uint8_t mv[4];
+ uint8_t ref[4];
+} x264_left_table_t;
+
struct x264_t
{
/* encoder parameters */
@@ -473,6 +494,10 @@
/* Slice header */
x264_slice_header_t sh;
+ /* Slice header backup, for SEI_DEC_REF_PIC_MARKING */
+ int b_sh_backup;
+ x264_slice_header_t sh_backup;
+
/* cabac context */
x264_cabac_t cabac;
@@ -549,6 +574,8 @@
int i_mb_stride;
int i_b8_stride;
int i_b4_stride;
+ int left_b8[2];
+ int left_b4[2];
/* Current index */
int i_mb_x;
@@ -568,17 +595,24 @@
int i_psy_trellis; /* Psy trellis strength--fixed point value*/
int b_interlaced;
+ int b_adaptive_mbaff; /* MBAFF+subme 0 requires non-adaptive MBAFF i.e. all field mbs */
/* Allowed qpel MV range to stay within the picture + emulated edge pixels */
int mv_min[2];
int mv_max[2];
+ int mv_miny_row[3]; /* 0 == top progressive, 1 == bot progressive, 2 == interlaced */
+ int mv_maxy_row[3];
/* Subpel MV range for motion search.
* same mv_min/max but includes levels' i_mv_range. */
int mv_min_spel[2];
int mv_max_spel[2];
+ int mv_miny_spel_row[3];
+ int mv_maxy_spel_row[3];
/* Fullpel MV range for motion search */
int mv_min_fpel[2];
int mv_max_fpel[2];
+ int mv_miny_fpel_row[3];
+ int mv_maxy_fpel_row[3];
/* neighboring MBs */
unsigned int i_neighbour;
@@ -587,14 +621,22 @@
unsigned int i_neighbour_intra; /* for constrained intra pred */
unsigned int i_neighbour_frame; /* ignoring slice boundaries */
int i_mb_type_top;
- int i_mb_type_left;
+ int i_mb_type_left[2];
int i_mb_type_topleft;
int i_mb_type_topright;
int i_mb_prev_xy;
- int i_mb_left_xy;
+ int i_mb_left_xy[2];
int i_mb_top_xy;
int i_mb_topleft_xy;
int i_mb_topright_xy;
+ int i_mb_top_y;
+ int i_mb_topleft_y;
+ int i_mb_topright_y;
+ const x264_left_table_t *left_index_table;
+ int i_mb_top_mbpair_xy;
+ int topleft_partition;
+ int b_allow_skip;
+ int field_decoding_flag;
/**** thread synchronization ends here ****/
/* subsequent variables are either thread-local or constant,
@@ -617,6 +659,7 @@
int8_t *mb_transform_size; /* transform_size_8x8_flag of each mb */
uint16_t *slice_table; /* sh->first_mb of the slice that the indexed mb is part of
* NOTE: this will fail on resolutions above 2^16 MBs... */
+ uint8_t *field;
/* buffer for weighted versions of the reference frames */
pixel *p_weight_buf[X264_REF_MAX];
@@ -645,6 +688,7 @@
int b_reencode_mb;
int ip_offset; /* Used by PIR to offset the quantizer of intra-refresh blocks. */
int b_deblock_rdo;
+ int b_overflow; /* If CAVLC had a level code overflow during bitstream writing. */
struct
{
@@ -716,11 +760,15 @@
/* number of neighbors (top and left) that used 8x8 dct */
int i_neighbour_transform_size;
- int i_neighbour_interlaced;
+ int i_neighbour_skip;
/* neighbor CBPs */
int i_cbp_top;
int i_cbp_left;
+
+ /* extra data required for mbaff in mv prediction */
+ int16_t topright_mv[2][3][2];
+ int8_t topright_ref[2][3];
} cache;
/* */
@@ -739,9 +787,9 @@
int i_chroma_lambda2_offset;
/* B_direct and weighted prediction */
- int16_t dist_scale_factor_buf[2][X264_REF_MAX*2][4];
+ int16_t dist_scale_factor_buf[2][2][X264_REF_MAX*2][4];
int16_t (*dist_scale_factor)[4];
- int8_t bipred_weight_buf[2][X264_REF_MAX*2][4];
+ int8_t bipred_weight_buf[2][2][X264_REF_MAX*2][4];
int8_t (*bipred_weight)[4];
/* maps fref1[0]'s ref indices into the current list0 */
#define map_col_to_list0(col) h->mb.map_col_to_list0[(col)+2]
@@ -776,6 +824,7 @@
int i_mb_partition[17];
int i_mb_cbp[6];
int i_mb_pred_mode[4][13];
+ int i_mb_field[3];
/* Adaptive direct mv pred */
int i_direct_score[2];
/* Metrics */
@@ -805,6 +854,7 @@
int64_t i_mb_count_ref[2][2][X264_REF_MAX*2];
int64_t i_mb_cbp[6];
int64_t i_mb_pred_mode[4][13];
+ int64_t i_mb_field[3];
/* */
int i_direct_score[2];
int i_direct_frames[2];
@@ -824,8 +874,10 @@
/* Buffers that are allocated per-thread even in sliced threads. */
void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
- pixel *intra_border_backup[2][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
- uint8_t (*deblock_strength[2])[2][4][4];
+ pixel *intra_border_backup[5][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
+ /* Deblock strength values are stored for each 4x4 partition. In MBAFF
+ * there are four extra values that need to be stored, located in [4][i]. */
+ uint8_t (*deblock_strength[2])[2][8][4];
/* CPU functions dependents */
x264_predict_t predict_16x16[4+3];
@@ -838,6 +890,8 @@
x264_mc_functions_t mc;
x264_dct_function_t dctf;
x264_zigzag_function_t zigzagf;
+ x264_zigzag_function_t zigzagf_interlaced;
+ x264_zigzag_function_t zigzagf_progressive;
x264_quant_function_t quantf;
x264_deblock_function_t loopf;
x264_bitstream_function_t bsf;
@@ -850,11 +904,12 @@
// included at the end because it needs x264_t
#include "macroblock.h"
-#include "rectangle.h"
-#if HAVE_MMX
+#if ARCH_X86 || ARCH_X86_64
#include "x86/util.h"
#endif
+#include "rectangle.h"
+
#endif
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/cpu.c
^
|
@@ -45,7 +45,8 @@
#include <machine/cpu.h>
#endif
-const x264_cpu_name_t x264_cpu_names[] = {
+const x264_cpu_name_t x264_cpu_names[] =
+{
{"Altivec", X264_CPU_ALTIVEC},
// {"MMX", X264_CPU_MMX}, // we don't support asm on mmx1 cpus anymore
{"MMX2", X264_CPU_MMX|X264_CPU_MMXEXT},
@@ -357,9 +358,12 @@
#if !HAVE_THREAD
return 1;
-#elif defined(_WIN32)
+#elif SYS_WINDOWS
return x264_pthread_num_processors_np();
+#elif SYS_CYGWIN
+ return sysconf( _SC_NPROCESSORS_ONLN );
+
#elif SYS_LINUX
unsigned int bit;
int np;
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/cpu.h
^
|
@@ -31,7 +31,16 @@
void x264_cpu_emms( void );
void x264_cpu_sfence( void );
#if HAVE_MMX
+/* There is no way to forbid the compiler from using float instructions
+ * before the emms so miscompilation could theoretically occur in the
+ * unlikely event that the compiler reorders emms and float instructions. */
+#if HAVE_X86_INLINE_ASM
+/* Clobbering memory makes the compiler less likely to reorder code. */
+#define x264_emms() asm volatile( "emms":::"memory","st","st(1)","st(2)", \
+ "st(3)","st(4)","st(5)","st(6)","st(7)" )
+#else
#define x264_emms() x264_cpu_emms()
+#endif
#else
#define x264_emms()
#endif
@@ -53,9 +62,10 @@
#define x264_stack_align(func,...) func(__VA_ARGS__)
#endif
-typedef struct {
+typedef struct
+{
const char name[16];
- int flags;
+ uint32_t flags;
} x264_cpu_name_t;
extern const x264_cpu_name_t x264_cpu_names[];
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/dct.c
^
|
@@ -746,123 +746,117 @@
}
}
-void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
+void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zigzag_function_t *pf_interlaced )
{
- if( b_interlaced )
- {
- pf->scan_8x8 = zigzag_scan_8x8_field;
- pf->scan_4x4 = zigzag_scan_4x4_field;
- pf->sub_8x8 = zigzag_sub_8x8_field;
- pf->sub_4x4 = zigzag_sub_4x4_field;
- pf->sub_4x4ac = zigzag_sub_4x4ac_field;
+ pf_interlaced->scan_8x8 = zigzag_scan_8x8_field;
+ pf_progressive->scan_8x8 = zigzag_scan_8x8_frame;
+ pf_interlaced->scan_4x4 = zigzag_scan_4x4_field;
+ pf_progressive->scan_4x4 = zigzag_scan_4x4_frame;
+ pf_interlaced->sub_8x8 = zigzag_sub_8x8_field;
+ pf_progressive->sub_8x8 = zigzag_sub_8x8_frame;
+ pf_interlaced->sub_4x4 = zigzag_sub_4x4_field;
+ pf_progressive->sub_4x4 = zigzag_sub_4x4_frame;
+ pf_interlaced->sub_4x4ac = zigzag_sub_4x4ac_field;
+ pf_progressive->sub_4x4ac = zigzag_sub_4x4ac_frame;
+
#if HIGH_BIT_DEPTH
#if HAVE_MMX
- if( cpu&X264_CPU_SSE2 )
- pf->scan_4x4 = x264_zigzag_scan_4x4_field_sse2;
- if( cpu&X264_CPU_SSE4 )
- pf->scan_8x8 = x264_zigzag_scan_8x8_field_sse4;
- if( cpu&X264_CPU_AVX )
- pf->scan_8x8 = x264_zigzag_scan_8x8_field_avx;
-#endif // HAVE_MMX
-#else
-#if HAVE_MMX
- if( cpu&X264_CPU_MMXEXT )
- {
- pf->scan_4x4 = x264_zigzag_scan_4x4_field_mmxext;
- pf->scan_8x8 = x264_zigzag_scan_8x8_field_mmxext;
- }
- if( cpu&X264_CPU_SSSE3 )
- {
- pf->sub_4x4 = x264_zigzag_sub_4x4_field_ssse3;
- pf->sub_4x4ac= x264_zigzag_sub_4x4ac_field_ssse3;
- }
- if( cpu&X264_CPU_AVX )
- {
- pf->sub_4x4 = x264_zigzag_sub_4x4_field_avx;
-#if ARCH_X86_64
- pf->sub_4x4ac= x264_zigzag_sub_4x4ac_field_avx;
-#endif
- }
-#endif // HAVE_MMX
-#if HAVE_ALTIVEC
- if( cpu&X264_CPU_ALTIVEC )
- pf->scan_4x4 = x264_zigzag_scan_4x4_field_altivec;
-#endif
-#endif // HIGH_BIT_DEPTH
- }
- else
+ if( cpu&X264_CPU_SSE2 )
{
- pf->scan_8x8 = zigzag_scan_8x8_frame;
- pf->scan_4x4 = zigzag_scan_4x4_frame;
- pf->sub_8x8 = zigzag_sub_8x8_frame;
- pf->sub_4x4 = zigzag_sub_4x4_frame;
- pf->sub_4x4ac = zigzag_sub_4x4ac_frame;
-#if HIGH_BIT_DEPTH
-#if HAVE_MMX
- if( cpu&X264_CPU_SSE2 )
- {
- pf->scan_4x4 = x264_zigzag_scan_4x4_frame_sse2;
- pf->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
- }
+ pf_interlaced->scan_4x4 = x264_zigzag_scan_4x4_field_sse2;
+ pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_sse2;
+ pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
+ }
+ if( cpu&X264_CPU_SSE4 )
+ pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_sse4;
+ if( cpu&X264_CPU_AVX )
+ pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_avx;
#if ARCH_X86_64
- if( cpu&X264_CPU_AVX )
- {
- pf->scan_4x4 = x264_zigzag_scan_4x4_frame_avx;
- pf->scan_8x8 = x264_zigzag_scan_8x8_frame_avx;
- }
+ if( cpu&X264_CPU_AVX )
+ {
+ pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx;
+ pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx;
+ }
#endif // ARCH_X86_64
#endif // HAVE_MMX
#else
#if HAVE_MMX
- if( cpu&X264_CPU_MMX )
- pf->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
- if( cpu&X264_CPU_MMXEXT )
- pf->scan_8x8 = x264_zigzag_scan_8x8_frame_mmxext;
- if( cpu&X264_CPU_SSE2_IS_FAST )
- pf->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
- if( cpu&X264_CPU_SSSE3 )
- {
- pf->sub_4x4 = x264_zigzag_sub_4x4_frame_ssse3;
- pf->sub_4x4ac= x264_zigzag_sub_4x4ac_frame_ssse3;
- pf->scan_8x8 = x264_zigzag_scan_8x8_frame_ssse3;
- if( cpu&X264_CPU_SHUFFLE_IS_FAST )
- pf->scan_4x4 = x264_zigzag_scan_4x4_frame_ssse3;
- }
- if( cpu&X264_CPU_AVX )
- {
- pf->sub_4x4 = x264_zigzag_sub_4x4_frame_avx;
+ if( cpu&X264_CPU_MMX )
+ pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
+ if( cpu&X264_CPU_MMXEXT )
+ {
+ pf_interlaced->scan_4x4 = x264_zigzag_scan_4x4_field_mmxext;
+ pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_mmxext;
+ pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_mmxext;
+ }
+ if( cpu&X264_CPU_SSE2_IS_FAST )
+ pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
+ if( cpu&X264_CPU_SSSE3 )
+ {
+ pf_interlaced->sub_4x4 = x264_zigzag_sub_4x4_field_ssse3;
+ pf_progressive->sub_4x4 = x264_zigzag_sub_4x4_frame_ssse3;
+ pf_interlaced->sub_4x4ac = x264_zigzag_sub_4x4ac_field_ssse3;
+ pf_progressive->sub_4x4ac= x264_zigzag_sub_4x4ac_frame_ssse3;
+ pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_ssse3;
+ if( cpu&X264_CPU_SHUFFLE_IS_FAST )
+ pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_ssse3;
+ }
+ if( cpu&X264_CPU_AVX )
+ {
+ pf_interlaced->sub_4x4 = x264_zigzag_sub_4x4_field_avx;
+ pf_progressive->sub_4x4 = x264_zigzag_sub_4x4_frame_avx;
#if ARCH_X86_64
- pf->sub_4x4ac= x264_zigzag_sub_4x4ac_frame_avx;
+ pf_interlaced->sub_4x4ac = x264_zigzag_sub_4x4ac_field_avx;
+ pf_progressive->sub_4x4ac= x264_zigzag_sub_4x4ac_frame_avx;
#endif
- if( cpu&X264_CPU_SHUFFLE_IS_FAST )
- pf->scan_4x4 = x264_zigzag_scan_4x4_frame_avx;
- }
+ if( cpu&X264_CPU_SHUFFLE_IS_FAST )
+ pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx;
+ }
#endif // HAVE_MMX
#if HAVE_ALTIVEC
- if( cpu&X264_CPU_ALTIVEC )
- pf->scan_4x4 = x264_zigzag_scan_4x4_frame_altivec;
+ if( cpu&X264_CPU_ALTIVEC )
+ {
+ pf_interlaced->scan_4x4 = x264_zigzag_scan_4x4_field_altivec;
+ pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_altivec;
+ }
#endif
#if HAVE_ARMV6
- if( cpu&X264_CPU_NEON )
- pf->scan_4x4 = x264_zigzag_scan_4x4_frame_neon;
+ if( cpu&X264_CPU_NEON )
+ pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_neon;
#endif
#endif // HIGH_BIT_DEPTH
- }
- pf->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
+ pf_interlaced->interleave_8x8_cavlc =
+ pf_progressive->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
#if HAVE_MMX
#if HIGH_BIT_DEPTH
if( cpu&X264_CPU_SSE2 )
- pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
+ {
+ pf_interlaced->interleave_8x8_cavlc =
+ pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
+ }
if( cpu&X264_CPU_AVX )
- pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx;
+ {
+ pf_interlaced->interleave_8x8_cavlc =
+ pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx;
+ }
#else
if( cpu&X264_CPU_MMX )
- pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_mmx;
+ {
+ pf_interlaced->interleave_8x8_cavlc =
+ pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_mmx;
+ }
if( cpu&X264_CPU_SHUFFLE_IS_FAST )
- pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
+ {
+ pf_interlaced->interleave_8x8_cavlc =
+ pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
+ }
+
if( cpu&X264_CPU_AVX )
- pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx;
+ {
+ pf_interlaced->interleave_8x8_cavlc =
+ pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_avx;
+ }
#endif // HIGH_BIT_DEPTH
#endif
}
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/dct.h
^
|
@@ -132,6 +132,6 @@
void x264_dct_init( int cpu, x264_dct_function_t *dctf );
void x264_dct_init_weights( void );
-void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced );
+void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zigzag_function_t *pf_interlaced );
#endif
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/deblock.c
^
|
@@ -75,6 +75,37 @@
#define tc0_table(x) i_tc0_table[(x)+24]
/* From ffmpeg */
+static ALWAYS_INLINE void deblock_edge_luma_c( pixel *pix, int xstride, int alpha, int beta, int8_t tc0 )
+{
+ int p2 = pix[-3*xstride];
+ int p1 = pix[-2*xstride];
+ int p0 = pix[-1*xstride];
+ int q0 = pix[ 0*xstride];
+ int q1 = pix[ 1*xstride];
+ int q2 = pix[ 2*xstride];
+
+ if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
+ {
+ int tc = tc0;
+ int delta;
+ if( abs( p2 - p0 ) < beta )
+ {
+ if( tc0 )
+ pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0, tc0 );
+ tc++;
+ }
+ if( abs( q2 - q0 ) < beta )
+ {
+ if( tc0 )
+ pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0, tc0 );
+ tc++;
+ }
+
+ delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+ pix[-1*xstride] = x264_clip_pixel( p0 + delta ); /* p0' */
+ pix[ 0*xstride] = x264_clip_pixel( q0 - delta ); /* q0' */
+ }
+}
static inline void deblock_luma_c( pixel *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
{
for( int i = 0; i < 4; i++ )
@@ -84,40 +115,15 @@
pix += 4*ystride;
continue;
}
- for( int d = 0; d < 4; d++ )
- {
- int p2 = pix[-3*xstride];
- int p1 = pix[-2*xstride];
- int p0 = pix[-1*xstride];
- int q0 = pix[ 0*xstride];
- int q1 = pix[ 1*xstride];
- int q2 = pix[ 2*xstride];
-
- if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
- {
- int tc = tc0[i];
- int delta;
- if( abs( p2 - p0 ) < beta )
- {
- if( tc0[i] )
- pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
- tc++;
- }
- if( abs( q2 - q0 ) < beta )
- {
- if( tc0[i] )
- pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
- tc++;
- }
-
- delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
- pix[-1*xstride] = x264_clip_pixel( p0 + delta ); /* p0' */
- pix[ 0*xstride] = x264_clip_pixel( q0 - delta ); /* q0' */
- }
- pix += ystride;
- }
+ for( int d = 0; d < 4; d++, pix += ystride )
+ deblock_edge_luma_c( pix, xstride, alpha, beta, tc0[i] );
}
}
+static inline void deblock_v_luma_mbaff_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
+{
+ for( int d = 0; d < 8; d++, pix += stride )
+ deblock_edge_luma_c( pix, 1, alpha, beta, tc0[d>>1] );
+}
static void deblock_v_luma_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
@@ -127,6 +133,20 @@
deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
}
+static ALWAYS_INLINE void deblock_edge_chroma_c( pixel *pix, int xstride, int alpha, int beta, int8_t tc )
+{
+ int p1 = pix[-2*xstride];
+ int p0 = pix[-1*xstride];
+ int q0 = pix[ 0*xstride];
+ int q1 = pix[ 1*xstride];
+
+ if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
+ {
+ int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+ pix[-1*xstride] = x264_clip_pixel( p0 + delta ); /* p0' */
+ pix[ 0*xstride] = x264_clip_pixel( q0 - delta ); /* q0' */
+ }
+}
static inline void deblock_chroma_c( pixel *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
{
for( int i = 0; i < 4; i++ )
@@ -139,21 +159,14 @@
}
for( int d = 0; d < 2; d++, pix += ystride-2 )
for( int e = 0; e < 2; e++, pix++ )
- {
- int p1 = pix[-2*xstride];
- int p0 = pix[-1*xstride];
- int q0 = pix[ 0*xstride];
- int q1 = pix[ 1*xstride];
-
- if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
- {
- int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
- pix[-1*xstride] = x264_clip_pixel( p0 + delta ); /* p0' */
- pix[ 0*xstride] = x264_clip_pixel( q0 - delta ); /* q0' */
- }
- }
+ deblock_edge_chroma_c( pix, xstride, alpha, beta, tc0[i] );
}
}
+static inline void deblock_v_chroma_mbaff_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
+{
+ for( int i = 0; i < 4; i++, pix += stride )
+ deblock_edge_chroma_c( pix, 2, alpha, beta, tc0[i] );
+}
static void deblock_v_chroma_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
deblock_chroma_c( pix, stride, 2, alpha, beta, tc0 );
@@ -163,49 +176,55 @@
deblock_chroma_c( pix, 2, stride, alpha, beta, tc0 );
}
-static inline void deblock_luma_intra_c( pixel *pix, int xstride, int ystride, int alpha, int beta )
+static ALWAYS_INLINE void deblock_edge_luma_intra_c( pixel *pix, int xstride, int alpha, int beta )
{
- for( int d = 0; d < 16; d++ )
- {
- int p2 = pix[-3*xstride];
- int p1 = pix[-2*xstride];
- int p0 = pix[-1*xstride];
- int q0 = pix[ 0*xstride];
- int q1 = pix[ 1*xstride];
- int q2 = pix[ 2*xstride];
+ int p2 = pix[-3*xstride];
+ int p1 = pix[-2*xstride];
+ int p0 = pix[-1*xstride];
+ int q0 = pix[ 0*xstride];
+ int q1 = pix[ 1*xstride];
+ int q2 = pix[ 2*xstride];
- if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
+ if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
+ {
+ if( abs( p0 - q0 ) < ((alpha >> 2) + 2) )
{
- if(abs( p0 - q0 ) < ((alpha >> 2) + 2) )
+ if( abs( p2 - p0 ) < beta ) /* p0', p1', p2' */
{
- if( abs( p2 - p0 ) < beta ) /* p0', p1', p2' */
- {
- const int p3 = pix[-4*xstride];
- pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
- pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
- pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
- }
- else /* p0' */
- pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
- if( abs( q2 - q0 ) < beta ) /* q0', q1', q2' */
- {
- const int q3 = pix[3*xstride];
- pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
- pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
- pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
- }
- else /* q0' */
- pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+ const int p3 = pix[-4*xstride];
+ pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
+ pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
+ pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
}
- else /* p0', q0' */
- {
+ else /* p0' */
pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
- pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+ if( abs( q2 - q0 ) < beta ) /* q0', q1', q2' */
+ {
+ const int q3 = pix[3*xstride];
+ pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
+ pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
+ pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
}
+ else /* q0' */
+ pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+ }
+ else /* p0', q0' */
+ {
+ pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+ pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
}
- pix += ystride;
}
}
+static inline void deblock_luma_intra_c( pixel *pix, int xstride, int ystride, int alpha, int beta )
+{
+ for( int d = 0; d < 16; d++, pix += ystride )
+ deblock_edge_luma_intra_c( pix, xstride, alpha, beta );
+}
+static inline void deblock_v_luma_intra_mbaff_c( pixel *pix, int ystride, int alpha, int beta )
+{
+ for( int d = 0; d < 8; d++, pix += ystride )
+ deblock_edge_luma_intra_c( pix, 1, alpha, beta );
+}
static void deblock_v_luma_intra_c( pixel *pix, int stride, int alpha, int beta )
{
deblock_luma_intra_c( pix, stride, 1, alpha, beta );
@@ -215,22 +234,29 @@
deblock_luma_intra_c( pix, 1, stride, alpha, beta );
}
+static ALWAYS_INLINE void deblock_edge_chroma_intra_c( pixel *pix, int xstride, int alpha, int beta )
+{
+ int p1 = pix[-2*xstride];
+ int p0 = pix[-1*xstride];
+ int q0 = pix[ 0*xstride];
+ int q1 = pix[ 1*xstride];
+
+ if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
+ {
+ pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2; /* p0' */
+ pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */
+ }
+}
static inline void deblock_chroma_intra_c( pixel *pix, int xstride, int ystride, int alpha, int beta, int dir )
{
for( int d = 0; d < (dir?16:8); d++, pix += ystride-2 )
for( int e = 0; e < (dir?1:2); e++, pix++ )
- {
- int p1 = pix[-2*xstride];
- int p0 = pix[-1*xstride];
- int q0 = pix[ 0*xstride];
- int q1 = pix[ 1*xstride];
-
- if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
- {
- pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2; /* p0' */
- pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */
- }
- }
+ deblock_edge_chroma_intra_c( pix, xstride, alpha, beta );
+}
+static inline void deblock_v_chroma_intra_mbaff_c( pixel *pix, int stride, int alpha, int beta )
+{
+ for( int i = 0; i < 4; i++, pix += stride )
+ deblock_edge_chroma_intra_c( pix, 2, alpha, beta );
}
static void deblock_v_chroma_intra_c( pixel *pix, int stride, int alpha, int beta )
{
@@ -242,8 +268,8 @@
}
static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], int mvy_limit,
- int bframe )
+ int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit,
+ int bframe, x264_t *h )
{
for( int dir = 0; dir < 2; dir++ )
{
@@ -270,6 +296,162 @@
}
}
+void deblock_strength_mbaff_c( uint8_t nnz_cache[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
+ int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
+ int mvy_limit, int bframe, x264_t *h )
+{
+ int neighbour_field[2];
+ neighbour_field[0] = h->mb.i_mb_left_xy[0] >= 0 && h->mb.field[h->mb.i_mb_left_xy[0]];
+ neighbour_field[1] = h->mb.i_mb_top_xy >= 0 && h->mb.field[h->mb.i_mb_top_xy];
+ int intra_cur = IS_INTRA( h->mb.i_type );
+
+ if( !intra_cur )
+ {
+ for( int dir = 0; dir < 2; dir++ )
+ {
+ int edge_stride = dir ? 8 : 1;
+ int part_stride = dir ? 1 : 8;
+ for( int edge = 0; edge < 4; edge++ )
+ {
+ for( int i = 0, q = X264_SCAN8_0+edge*edge_stride; i < 4; i++, q += part_stride )
+ {
+ int p = q - edge_stride;
+ if( nnz_cache[q] || nnz_cache[p] )
+ {
+ bs[dir][edge][i] = 2;
+ }
+ else if( (edge == 0 && MB_INTERLACED != neighbour_field[dir]) ||
+ ref[0][q] != ref[0][p] ||
+ abs( mv[0][q][0] - mv[0][p][0] ) >= 4 ||
+ abs( mv[0][q][1] - mv[0][p][1] ) >= mvy_limit ||
+ (bframe && (ref[1][q] != ref[1][p] ||
+ abs( mv[1][q][0] - mv[1][p][0] ) >= 4 ||
+ abs( mv[1][q][1] - mv[1][p][1] ) >= mvy_limit )) )
+ {
+ bs[dir][edge][i] = 1;
+ }
+ else
+ bs[dir][edge][i] = 0;
+ }
+ }
+ }
+ }
+
+ if( h->mb.i_neighbour & MB_LEFT )
+ {
+ if( h->mb.field[h->mb.i_mb_left_xy[0]] != MB_INTERLACED )
+ {
+ static const uint8_t offset[2][2][8] = {
+ { { 0, 0, 0, 0, 1, 1, 1, 1 },
+ { 2, 2, 2, 2, 3, 3, 3, 3 }, },
+ { { 0, 1, 2, 3, 0, 1, 2, 3 },
+ { 0, 1, 2, 3, 0, 1, 2, 3 }, }
+ };
+ uint8_t bS[8];
+
+ if( intra_cur )
+ memset( bS, 4, 8 );
+ else
+ {
+ const uint8_t *off = offset[MB_INTERLACED][h->mb.i_mb_y&1];
+ uint8_t (*nnz)[24] = h->mb.non_zero_count;
+
+ for( int i = 0; i < 8; i++ )
+ {
+ int left = h->mb.i_mb_left_xy[MB_INTERLACED ? i>>2 : i&1];
+ int nnz_this = h->mb.cache.non_zero_count[x264_scan8[0]+8*(i>>1)];
+ int nnz_left = nnz[left][3 + 4*off[i]];
+ if( !h->param.b_cabac && h->pps->b_transform_8x8_mode )
+ {
+ int j = off[i]&~1;
+ if( h->mb.mb_transform_size[left] )
+ nnz_left = !!(M16( &nnz[left][2+4*j] ) | M16( &nnz[left][2+4*(1+j)] ));
+ }
+ if( IS_INTRA( h->mb.type[left] ) )
+ bS[i] = 4;
+ else if( nnz_left || nnz_this )
+ bS[i] = 2;
+ else // As left is different interlaced.
+ bS[i] = 1;
+ }
+ }
+
+ if( MB_INTERLACED )
+ {
+ for( int i = 0; i < 4; i++ ) bs[0][0][i] = bS[i];
+ for( int i = 0; i < 4; i++ ) bs[0][4][i] = bS[4+i];
+ }
+ else
+ {
+ for( int i = 0; i < 4; i++ ) bs[0][0][i] = bS[2*i];
+ for( int i = 0; i < 4; i++ ) bs[0][4][i] = bS[1+2*i];
+ }
+ }
+ }
+
+ if( h->mb.i_neighbour & MB_TOP )
+ {
+ if( !(h->mb.i_mb_y&1) && !MB_INTERLACED && h->mb.field[h->mb.i_mb_top_xy] )
+ {
+ /* Need to filter both fields (even for frame macroblocks).
+ * Filter top two rows using the top macroblock of the above
+ * pair and then the bottom one. */
+ int mbn_xy = h->mb.i_mb_xy - 2 * h->mb.i_mb_stride;
+ uint32_t nnz_cur[4];
+ nnz_cur[0] = h->mb.cache.non_zero_count[x264_scan8[0]+0];
+ nnz_cur[1] = h->mb.cache.non_zero_count[x264_scan8[0]+1];
+ nnz_cur[2] = h->mb.cache.non_zero_count[x264_scan8[0]+2];
+ nnz_cur[3] = h->mb.cache.non_zero_count[x264_scan8[0]+3];
+ /* Munge NNZ for cavlc + 8x8dct */
+ if( !h->param.b_cabac && h->pps->b_transform_8x8_mode &&
+ h->mb.mb_transform_size[h->mb.i_mb_xy] )
+ {
+ int nnz0 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 2]] );
+ int nnz1 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 4]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 6]] );
+ nnz_cur[0] = nnz_cur[1] = !!nnz0;
+ nnz_cur[2] = nnz_cur[3] = !!nnz1;
+ }
+
+ for( int j = 0; j < 2; j++, mbn_xy += h->mb.i_mb_stride )
+ {
+ int mbn_intra = IS_INTRA( h->mb.type[mbn_xy] );
+ uint8_t (*nnz)[24] = h->mb.non_zero_count;
+
+ uint32_t nnz_top[4];
+ nnz_top[0] = nnz[mbn_xy][3*4+0];
+ nnz_top[1] = nnz[mbn_xy][3*4+1];
+ nnz_top[2] = nnz[mbn_xy][3*4+2];
+ nnz_top[3] = nnz[mbn_xy][3*4+3];
+
+ if( !h->param.b_cabac && h->pps->b_transform_8x8_mode &&
+ (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[mbn_xy] )
+ {
+ int nnz_top0 = M16( &nnz[mbn_xy][8] ) | M16( &nnz[mbn_xy][12] );
+ int nnz_top1 = M16( &nnz[mbn_xy][10] ) | M16( &nnz[mbn_xy][14] );
+ nnz_top[0] = nnz_top[1] = nnz_top0 ? 0x0101 : 0;
+ nnz_top[2] = nnz_top[3] = nnz_top1 ? 0x0101 : 0;
+ }
+
+ uint8_t bS[4];
+ if( intra_cur || mbn_intra )
+ M32( bS ) = 0x03030303;
+ else
+ {
+ for( int i = 0; i < 4; i++ )
+ {
+ if( nnz_cur[i] || nnz_top[i] )
+ bS[i] = 2;
+ else
+ bS[i] = 1;
+ }
+ }
+ for( int i = 0; i < 4; i++ )
+ bs[1][4*j][i] = bS[i];
+ }
+ }
+ }
+}
+
static inline void deblock_edge( x264_t *h, pixel *pix, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
{
int index_a = i_qp-QP_BD_OFFSET + h->sh.i_alpha_c0_offset;
@@ -304,12 +486,10 @@
void x264_frame_deblock_row( x264_t *h, int mb_y )
{
- int b_interlaced = h->sh.b_mbaff;
+ int b_interlaced = SLICE_MBAFF;
int qp_thresh = 15 - X264_MIN( h->sh.i_alpha_c0_offset, h->sh.i_beta_offset ) - X264_MAX( 0, h->param.analyse.i_chroma_qp_offset );
int stridey = h->fdec->i_stride[0];
- int stride2y = stridey << b_interlaced;
int strideuv = h->fdec->i_stride[1];
- int stride2uv = strideuv << b_interlaced;
for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
{
@@ -319,16 +499,18 @@
int mb_xy = h->mb.i_mb_xy;
int transform_8x8 = h->mb.mb_transform_size[h->mb.i_mb_xy];
int intra_cur = IS_INTRA( h->mb.type[mb_xy] );
- uint8_t (*bs)[4][4] = h->deblock_strength[mb_y&1][mb_x];
+ uint8_t (*bs)[8][4] = h->deblock_strength[mb_y&1][mb_x];
pixel *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x;
pixel *pixuv = h->fdec->plane[1] + 8*mb_y*strideuv + 16*mb_x;
- if( mb_y & b_interlaced )
+ if( mb_y & MB_INTERLACED )
{
pixy -= 15*stridey;
pixuv -= 7*strideuv;
}
+ int stride2y = stridey << MB_INTERLACED;
+ int stride2uv = strideuv << MB_INTERLACED;
int qp = h->mb.qp[mb_xy];
int qpc = h->chroma_qp_table[qp];
int first_edge_only = h->mb.type[mb_xy] == P_SKIP || qp <= qp_thresh;
@@ -347,16 +529,59 @@
if( h->mb.i_neighbour & MB_LEFT )
{
- int qpl = h->mb.qp[h->mb.i_mb_left_xy];
- int qp_left = (qp + qpl + 1) >> 1;
- int qpc_left = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpl] + 1) >> 1;
- int intra_left = IS_INTRA( h->mb.type[h->mb.i_mb_left_xy] );
- if( intra_cur || intra_left )
- FILTER( _intra, 0, 0, qp_left, qpc_left );
+ if( b_interlaced && h->mb.field[h->mb.i_mb_left_xy[0]] != MB_INTERLACED )
+ {
+ int luma_qp[2];
+ int chroma_qp[2];
+ int left_qp[2];
+ int current_qp = h->mb.qp[mb_xy];
+ left_qp[0] = h->mb.qp[h->mb.i_mb_left_xy[0]];
+ luma_qp[0] = (current_qp + left_qp[0] + 1) >> 1;
+ chroma_qp[0] = (h->chroma_qp_table[current_qp] + h->chroma_qp_table[left_qp[0]] + 1) >> 1;
+ if( bs[0][0][0] == 4)
+ {
+ deblock_edge_intra( h, pixy, 2*stridey, bs[0][0], luma_qp[0], 0, deblock_v_luma_intra_mbaff_c );
+ deblock_edge_intra( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], 1, deblock_v_chroma_intra_mbaff_c );
+ deblock_edge_intra( h, pixuv + 1, 2*strideuv, bs[0][0], chroma_qp[0], 1, deblock_v_chroma_intra_mbaff_c );
+ }
+ else
+ {
+ deblock_edge( h, pixy, 2*stridey, bs[0][0], luma_qp[0], 0, deblock_v_luma_mbaff_c );
+ deblock_edge( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], 1, deblock_v_chroma_mbaff_c );
+ deblock_edge( h, pixuv + 1, 2*strideuv, bs[0][0], chroma_qp[0], 1, deblock_v_chroma_mbaff_c );
+ }
+
+ int offy = MB_INTERLACED ? 4 : 0;
+ int offuv = MB_INTERLACED ? 3 : 0;
+ left_qp[1] = h->mb.qp[h->mb.i_mb_left_xy[1]];
+ luma_qp[1] = (current_qp + left_qp[1] + 1) >> 1;
+ chroma_qp[1] = (h->chroma_qp_table[current_qp] + h->chroma_qp_table[left_qp[1]] + 1) >> 1;
+ if( bs[0][4][0] == 4)
+ {
+ deblock_edge_intra( h, pixy + (stridey<<offy), 2*stridey, bs[0][4], luma_qp[1], 0, deblock_v_luma_intra_mbaff_c );
+ deblock_edge_intra( h, pixuv + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], 1, deblock_v_chroma_intra_mbaff_c );
+ deblock_edge_intra( h, pixuv + 1 + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], 1, deblock_v_chroma_intra_mbaff_c );
+ }
+ else
+ {
+ deblock_edge( h, pixy + (stridey<<offy), 2*stridey, bs[0][4], luma_qp[1], 0, deblock_v_luma_mbaff_c );
+ deblock_edge( h, pixuv + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], 1, deblock_v_chroma_mbaff_c );
+ deblock_edge( h, pixuv + 1 + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], 1, deblock_v_chroma_mbaff_c );
+ }
+ }
else
- FILTER( , 0, 0, qp_left, qpc_left );
- }
+ {
+ int qpl = h->mb.qp[h->mb.i_mb_xy-1];
+ int qp_left = (qp + qpl + 1) >> 1;
+ int qpc_left = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpl] + 1) >> 1;
+ int intra_left = IS_INTRA( h->mb.type[h->mb.i_mb_xy-1] );
+ if( intra_cur || intra_left )
+ FILTER( _intra, 0, 0, qp_left, qpc_left );
+ else
+ FILTER( , 0, 0, qp_left, qpc_left );
+ }
+ }
if( !first_edge_only )
{
if( !transform_8x8 ) FILTER( , 0, 1, qp, qpc );
@@ -366,17 +591,39 @@
if( h->mb.i_neighbour & MB_TOP )
{
- int qpt = h->mb.qp[h->mb.i_mb_top_xy];
- int qp_top = (qp + qpt + 1) >> 1;
- int qpc_top = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpt] + 1) >> 1;
- int intra_top = IS_INTRA( h->mb.type[h->mb.i_mb_top_xy] );
- if( ~b_interlaced & (intra_cur | intra_top) )
- FILTER( _intra, 1, 0, qp_top, qpc_top );
+ if( b_interlaced && !(mb_y&1) && !MB_INTERLACED && h->mb.field[h->mb.i_mb_top_xy] )
+ {
+ int mbn_xy = mb_xy - 2 * h->mb.i_mb_stride;
+
+ for(int j=0; j<2; j++, mbn_xy += h->mb.i_mb_stride)
+ {
+ int qpt = h->mb.qp[mbn_xy];
+ int qp_top = (qp + qpt + 1) >> 1;
+ int qpc_top = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpt] + 1) >> 1;
+
+ // deblock the first horizontal edge of the even rows, then the first horizontal edge of the odd rows
+ deblock_edge( h, pixy + j*stridey, 2* stridey, bs[1][4*j], qp_top, 0, deblock_v_luma_c );
+ deblock_edge( h, pixuv + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, 1, deblock_v_chroma_c );
+ }
+ }
else
{
- if( intra_top )
- M32( bs[1][0] ) = 0x03030303;
- FILTER( , 1, 0, qp_top, qpc_top );
+ int qpt = h->mb.qp[h->mb.i_mb_top_xy];
+ int qp_top = (qp + qpt + 1) >> 1;
+ int qpc_top = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpt] + 1) >> 1;
+ int intra_top = IS_INTRA( h->mb.type[h->mb.i_mb_top_xy] );
+
+ if( (!b_interlaced || (!MB_INTERLACED && !h->mb.field[h->mb.i_mb_top_xy]))
+ && (intra_cur || intra_top) )
+ {
+ FILTER( _intra, 1, 0, qp_top, qpc_top );
+ }
+ else
+ {
+ if( intra_top )
+ M32( bs[1][0] ) = 0x03030303;
+ FILTER( , 1, 0, qp_top, qpc_top );
+ }
}
}
@@ -401,17 +648,17 @@
*/
void x264_macroblock_deblock( x264_t *h )
{
- int qp_thresh = 15 - X264_MIN( h->sh.i_alpha_c0_offset, h->sh.i_beta_offset ) - X264_MAX( 0, h->param.analyse.i_chroma_qp_offset );
+ int qp_thresh = 15 - X264_MIN( h->sh.i_alpha_c0_offset, h->sh.i_beta_offset ) - X264_MAX( 0, h->pps->i_chroma_qp_index_offset );
int qp = h->mb.i_qp;
if( qp <= qp_thresh || h->mb.i_type == P_SKIP )
return;
- uint8_t (*bs)[4][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
+ uint8_t (*bs)[8][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
if( IS_INTRA( h->mb.i_type ) )
- memset( bs, 3, 2*4*4*sizeof(uint8_t) );
+ memset( bs, 3, 2*8*4*sizeof(uint8_t) );
else
h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
- bs, 4 >> h->sh.b_mbaff, h->sh.i_type == SLICE_TYPE_B );
+ bs, 4 >> SLICE_MBAFF, h->sh.i_type == SLICE_TYPE_B, h );
int transform_8x8 = h->mb.b_transform_8x8;
pixel *fdec = h->mb.pic.p_fdec[0];
@@ -453,17 +700,17 @@
void x264_deblock_h_chroma_intra_sse2( pixel *pix, int stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_avx ( pixel *pix, int stride, int alpha, int beta );
void x264_deblock_strength_mmxext( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
- int mvy_limit, int bframe );
+ int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
+ int mvy_limit, int bframe, x264_t *h );
void x264_deblock_strength_sse2 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
- int mvy_limit, int bframe );
+ int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
+ int mvy_limit, int bframe, x264_t *h );
void x264_deblock_strength_ssse3 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
- int mvy_limit, int bframe );
+ int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
+ int mvy_limit, int bframe, x264_t *h );
void x264_deblock_strength_avx ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
- int mvy_limit, int bframe );
+ int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
+ int mvy_limit, int bframe, x264_t *h );
#if ARCH_X86
void x264_deblock_h_luma_mmxext( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
@@ -505,7 +752,7 @@
void x264_deblock_h_chroma_neon( uint8_t *, int, int, int, int8_t * );
#endif
-void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
+void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
{
pf->deblock_luma[1] = deblock_v_luma_c;
pf->deblock_luma[0] = deblock_h_luma_c;
@@ -585,4 +832,6 @@
}
#endif
#endif // !HIGH_BIT_DEPTH
+
+ if( b_mbaff ) pf->deblock_strength = deblock_strength_mbaff_c;
}
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/frame.c
^
|
@@ -48,7 +48,7 @@
int i_mb_count = h->mb.i_mb_count;
int i_stride, i_width, i_lines;
- int i_padv = PADV << h->param.b_interlaced;
+ int i_padv = PADV << PARAM_INTERLACED;
int luma_plane_size, chroma_plane_size;
int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
int disalign = h->param.cpu&X264_CPU_ALTIVEC ? 1<<9 : 1<<10;
@@ -100,20 +100,35 @@
CHECKED_MALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * i_padv/2 + PADH;
+ if( PARAM_INTERLACED )
+ {
+ CHECKED_MALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
+ frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * i_padv/2 + PADH;
+ }
/* all 4 luma planes allocated together, since the cacheline split code
* requires them to be in-phase wrt cacheline alignment. */
if( h->param.analyse.i_subpel_refine && b_fdec )
{
+ /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size * sizeof(pixel) );
+ if( PARAM_INTERLACED )
+ CHECKED_MALLOC( frame->buffer_fld[0], 4*luma_plane_size * sizeof(pixel) );
for( int i = 0; i < 4; i++ )
+ {
frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
+ frame->filtered_fld[i] = frame->buffer_fld[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
+ }
frame->plane[0] = frame->filtered[0];
+ frame->plane_fld[0] = frame->filtered_fld[0];
}
else
{
CHECKED_MALLOC( frame->buffer[0], luma_plane_size * sizeof(pixel) );
+ if( PARAM_INTERLACED )
+ CHECKED_MALLOC( frame->buffer_fld[0], luma_plane_size * sizeof(pixel) );
frame->filtered[0] = frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
+ frame->filtered_fld[0] = frame->plane_fld[0] = frame->buffer_fld[0] + frame->i_stride[0] * i_padv + PADH;
}
frame->b_duplicate = 0;
@@ -139,12 +154,15 @@
}
CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
CHECKED_MALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
+ CHECKED_MALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
if( h->param.analyse.i_me_method >= X264_ME_ESA )
{
CHECKED_MALLOC( frame->buffer[3],
frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
}
+ if( PARAM_INTERLACED )
+ CHECKED_MALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
}
else /* fenc frame */
{
@@ -162,7 +180,7 @@
CHECKED_MALLOCZERO( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
}
- CHECKED_MALLOC( frame->i_propagate_cost, (i_mb_count+3) * sizeof(uint16_t) );
+ CHECKED_MALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) );
for( int j = 0; j <= h->param.i_bframe+1; j++ )
for( int i = 0; i <= h->param.i_bframe+1; i++ )
CHECKED_MALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
@@ -198,7 +216,10 @@
if( !frame->b_duplicate )
{
for( int i = 0; i < 4; i++ )
+ {
x264_free( frame->buffer[i] );
+ x264_free( frame->buffer_fld[i] );
+ }
for( int i = 0; i < 4; i++ )
x264_free( frame->buffer_lowres[i] );
for( int i = 0; i < X264_BFRAME_MAX+2; i++ )
@@ -219,6 +240,8 @@
x264_free( frame->i_inv_qscale_factor );
x264_free( frame->i_row_bits );
x264_free( frame->f_row_qp );
+ x264_free( frame->f_row_qscale );
+ x264_free( frame->field );
x264_free( frame->mb_type );
x264_free( frame->mb_partition );
x264_free( frame->mv[0] );
@@ -310,23 +333,56 @@
static void ALWAYS_INLINE pixel_memset( pixel *dst, pixel *src, int len, int size )
{
uint8_t *dstp = (uint8_t*)dst;
- if( size == 1 )
- memset(dst, *src, len);
- else if( size == 2 )
+ uint8_t v1 = *src;
+ uint16_t v2 = size == 1 ? v1 + (v1 << 8) : M16( src );
+ uint32_t v4 = size <= 2 ? v2 + (v2 << 16) : M32( src );
+ int i = 0;
+ len *= size;
+
+ /* Align the input pointer if it isn't already */
+ if( (intptr_t)dstp & (WORD_SIZE - 1) )
+ {
+ if( size <= 2 && ((intptr_t)dstp & 3) )
+ {
+ if( size == 1 && ((intptr_t)dstp & 1) )
+ dstp[i++] = v1;
+ if( (intptr_t)dstp & 2 )
+ {
+ M16( dstp+i ) = v2;
+ i += 2;
+ }
+ }
+ if( WORD_SIZE == 8 && (intptr_t)dstp & 4 )
+ {
+ M32( dstp+i ) = v4;
+ i += 4;
+ }
+ }
+
+ /* Main copy loop */
+ if( WORD_SIZE == 8 )
{
- int v = M16( src );
- for( int i = 0; i < len; i++ )
- M16( dstp+i*2 ) = v;
+ uint64_t v8 = v4 + ((uint64_t)v4<<32);
+ for( ; i < len - 7; i+=8 )
+ M64( dstp+i ) = v8;
}
- else if( size == 4 )
+ for( ; i < len - 3; i+=4 )
+ M32( dstp+i ) = v4;
+
+ /* Finish up the last few bytes */
+ if( size <= 2 )
{
- int v = M32( src );
- for( int i = 0; i < len; i++ )
- M32( dstp+i*4 ) = v;
+ if( i < len - 1 )
+ {
+ M16( dstp+i ) = v2;
+ i += 2;
+ }
+ if( size == 1 && i != len )
+ dstp[i] = v1;
}
}
-static void plane_expand_border( pixel *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom, int b_chroma )
+static void ALWAYS_INLINE plane_expand_border( pixel *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom, int b_chroma )
{
#define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
for( int y = 0; y < i_height; y++ )
@@ -350,26 +406,35 @@
void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
{
int b_start = !mb_y;
- if( mb_y & h->sh.b_mbaff )
+ if( mb_y & SLICE_MBAFF )
return;
for( int i = 0; i < frame->i_plane; i++ )
{
int stride = frame->i_stride[i];
int width = 16*h->sps->i_mb_width;
- int height = (b_end ? 16*(h->mb.i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
+ int height = (b_end ? 16*(h->mb.i_mb_height - mb_y) >> SLICE_MBAFF : 16) >> !!i;
int padh = PADH;
int padv = PADV >> !!i;
// buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
- pixel *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
if( b_end && !b_start )
- height += 4 >> (!!i + h->sh.b_mbaff);
- if( h->sh.b_mbaff )
+ height += 4 >> (!!i + SLICE_MBAFF);
+ pixel *pix;
+ if( SLICE_MBAFF )
{
+ // border samples for each field are extended separately
+ pix = frame->plane_fld[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end, i );
plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end, i );
+
+ height = (b_end ? 16*(h->mb.i_mb_height - mb_y) : 32) >> !!i;
+ if( b_end && !b_start )
+ height += 4 >> (!!i);
+ pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
+ plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, i );
}
else
{
+ pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, i );
}
}
@@ -383,20 +448,22 @@
int b_start = !mb_y;
int stride = frame->i_stride[0];
int width = 16*h->mb.i_mb_width + 8;
- int height = b_end ? (16*(h->mb.i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
+ int height = b_end ? (16*(h->mb.i_mb_height - mb_y) >> SLICE_MBAFF) + 16 : 16;
int padh = PADH - 4;
int padv = PADV - 8;
for( int i = 1; i < 4; i++ )
{
// buffer: 8 luma, to match the hpel filter
- pixel *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
- if( h->sh.b_mbaff )
+ pixel *pix;
+ if( SLICE_MBAFF )
{
+ pix = frame->filtered_fld[i] + (16*mb_y - 16) * stride - 4;
plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end, 0 );
plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end, 0 );
}
- else
- plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, 0 );
+
+ pix = frame->filtered[i] + (16*mb_y - 8) * stride - 4;
+ plane_expand_border( pix, stride, width, height << SLICE_MBAFF, padh, padv, b_start, b_end, 0 );
}
}
@@ -426,12 +493,28 @@
{
for( int y = i_height; y < i_height + i_pady; y++ )
memcpy( &frame->plane[i][y*frame->i_stride[i]],
- &frame->plane[i][(i_height-(~y&h->param.b_interlaced)-1)*frame->i_stride[i]],
+ &frame->plane[i][(i_height-(~y&PARAM_INTERLACED)-1)*frame->i_stride[i]],
(i_width + i_padx) * sizeof(pixel) );
}
}
}
+void x264_expand_border_mbpair( x264_t *h, int mb_x, int mb_y )
+{
+ for( int i = 0; i < h->fenc->i_plane; i++ )
+ {
+ int stride = h->fenc->i_stride[i];
+ int height = h->param.i_height >> !!i;
+ int pady = (h->mb.i_mb_height * 16 - h->param.i_height) >> !!i;
+ int mbsize = (16>>!!i);
+ pixel *fenc = h->fenc->plane[i] + mbsize * mb_x;
+ for( int y = height; y < height + pady; y++ )
+ memcpy( fenc + y*stride,
+ fenc + (height-1)*stride,
+ mbsize * sizeof(pixel) );
+ }
+}
+
/* threading */
void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
{
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/frame.h
^
|
@@ -72,13 +72,16 @@
int i_width_lowres;
int i_lines_lowres;
pixel *plane[2];
+ pixel *plane_fld[2];
pixel *filtered[4]; /* plane[0], H, V, HV */
+ pixel *filtered_fld[4];
pixel *lowres[4]; /* half-size copy of input frame: Orig, H, V, HV */
uint16_t *integral;
/* for unrestricted mv we allocate more data than needed
* allocated data are stored in buffer */
pixel *buffer[4];
+ pixel *buffer_fld[4];
pixel *buffer_lowres[4];
x264_weight_t weight[X264_REF_MAX][3]; /* [ref_index][plane] */
@@ -92,6 +95,7 @@
int16_t (*mv[2])[2];
int16_t (*mv16x16)[2];
int16_t (*lowres_mvs[2][X264_BFRAME_MAX+1])[2];
+ uint8_t *field;
/* Stored as (lists_used << LOWRES_COST_SHIFT) + (cost).
* Doesn't need special addressing for intra cost because
@@ -117,6 +121,7 @@
int *i_row_satd;
int *i_row_bits;
float *f_row_qp;
+ float *f_row_qscale;
float *f_qp_offset;
float *f_qp_offset_aq;
int b_intra_calculated;
@@ -178,8 +183,8 @@
x264_deblock_intra_t deblock_luma_intra[2];
x264_deblock_intra_t deblock_chroma_intra[2];
void (*deblock_strength) ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], int mvy_limit,
- int bframe );
+ int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit,
+ int bframe, x264_t *h );
} x264_deblock_function_t;
x264_frame_t *x264_frame_new( x264_t *h, int b_fdec );
@@ -191,6 +196,7 @@
void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end );
void x264_frame_expand_border_lowres( x264_frame_t *frame );
void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame );
+void x264_expand_border_mbpair( x264_t *h, int mb_x, int mb_y );
void x264_frame_deblock_row( x264_t *h, int mb_y );
void x264_macroblock_deblock( x264_t *h );
@@ -198,7 +204,7 @@
void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end );
void x264_frame_init_lowres( x264_t *h, x264_frame_t *frame );
-void x264_deblock_init( int cpu, x264_deblock_function_t *pf );
+void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff );
void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed );
void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed );
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/macroblock.c
^
|
@@ -40,7 +40,7 @@
mvx, mvy, 4*width, 4*height, &h->sh.weight[i_ref][0] );
// chroma is offset if MCing from a field of opposite parity
- if( h->mb.b_interlaced & i_ref )
+ if( MB_INTERLACED & i_ref )
mvy += (h->mb.i_mb_y & 1)*4 - 2;
h->mc.mc_chroma( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x],
@@ -69,7 +69,7 @@
h->mb.pic.p_fref[1][i_ref], h->mb.pic.i_stride[0],
mvx, mvy, 4*width, 4*height, weight_none );
- if( h->mb.b_interlaced & i_ref )
+ if( MB_INTERLACED & i_ref )
mvy += (h->mb.i_mb_y & 1)*4 - 2;
h->mc.mc_chroma( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x],
@@ -101,9 +101,9 @@
h->mc.avg[i_mode]( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
src0, i_stride0, src1, i_stride1, weight );
- if( h->mb.b_interlaced & i_ref0 )
+ if( MB_INTERLACED & i_ref0 )
mvy0 += (h->mb.i_mb_y & 1)*4 - 2;
- if( h->mb.b_interlaced & i_ref1 )
+ if( MB_INTERLACED & i_ref1 )
mvy1 += (h->mb.i_mb_y & 1)*4 - 2;
h->mc.mc_chroma( tmp0, tmp0+8, 16, h->mb.pic.p_fref[0][i_ref0][4], h->mb.pic.i_stride[1],
@@ -212,7 +212,7 @@
h->mb.i_b8_stride = h->mb.i_mb_width * 2;
h->mb.i_b4_stride = h->mb.i_mb_width * 4;
- h->mb.b_interlaced = h->param.b_interlaced;
+ h->mb.b_interlaced = PARAM_INTERLACED;
CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
@@ -236,7 +236,7 @@
for( int i = 0; i < 2; i++ )
{
- int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << h->param.b_interlaced;
+ int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << PARAM_INTERLACED;
if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
@@ -250,7 +250,7 @@
if( h->param.analyse.i_weighted_pred )
{
- int i_padv = PADV << h->param.b_interlaced;
+ int i_padv = PADV << PARAM_INTERLACED;
int luma_plane_size = 0;
int numweightbuf;
@@ -314,18 +314,22 @@
int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
{
if( !b_lookahead )
- for( int i = 0; i <= h->param.b_interlaced; i++ )
- {
+ {
+ for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
for( int j = 0; j < 2; j++ )
{
/* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
CHECKED_MALLOCZERO( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
h->intra_border_backup[i][j] += 16;
- h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
+ if( !PARAM_INTERLACED )
+ h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
}
+ for( int i = 0; i <= PARAM_INTERLACED; i++ )
+ {
CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->mb.i_mb_width );
h->deblock_strength[1] = h->deblock_strength[i];
}
+ }
/* Allocate scratch buffer */
int scratch_size = 0;
@@ -338,7 +342,7 @@
((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
}
- int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+3)&~3) * sizeof(int);
+ int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int);
scratch_size = X264_MAX( scratch_size, buf_mbtree );
if( scratch_size )
CHECKED_MALLOC( h->scratch_buffer, scratch_size );
@@ -353,12 +357,13 @@
void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
{
if( !b_lookahead )
- for( int i = 0; i <= h->param.b_interlaced; i++ )
- {
+ {
+ for( int i = 0; i <= PARAM_INTERLACED; i++ )
x264_free( h->deblock_strength[i] );
+ for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
for( int j = 0; j < 2; j++ )
x264_free( h->intra_border_backup[i][j] - 16 );
- }
+ }
x264_free( h->scratch_buffer );
}
@@ -371,6 +376,7 @@
h->mb.ref[1] = h->fdec->ref[1];
h->mb.type = h->fdec->mb_type;
h->mb.partition = h->fdec->mb_partition;
+ h->mb.field = h->fdec->field;
h->fdec->i_ref[0] = h->i_ref[0];
h->fdec->i_ref[1] = h->i_ref[1];
@@ -403,12 +409,12 @@
{
deblock_ref_table(-2) = -2;
deblock_ref_table(-1) = -1;
- for( int i = 0; i < h->i_ref[0] << h->sh.b_mbaff; i++ )
+ for( int i = 0; i < h->i_ref[0] << SLICE_MBAFF; i++ )
{
/* Mask off high bits to avoid frame num collisions with -1/-2.
* In current x264 frame num values don't cover a range of more
* than 32, so 6 bits is enough for uniqueness. */
- if( !h->mb.b_interlaced )
+ if( !MB_INTERLACED )
deblock_ref_table(i) = h->fref[0][i]->i_frame_num&63;
else
deblock_ref_table(i) = ((h->fref[0][i>>1]->i_frame_num&63)<<1) + (i&1);
@@ -420,7 +426,7 @@
memset( h->mb.cache.ref, -2, sizeof( h->mb.cache.ref ) );
if( h->i_ref[0] > 0 )
- for( int field = 0; field <= h->sh.b_mbaff; field++ )
+ for( int field = 0; field <= SLICE_MBAFF; field++ )
{
int curpoc = h->fdec->i_poc + h->fdec->i_delta_poc[field];
int refpoc = h->fref[0][0]->i_poc + h->fref[0][0]->i_delta_poc[field];
@@ -452,7 +458,7 @@
(h->sh.i_type == SLICE_TYPE_B && h->mb.i_subpel_refine >= 9));
h->mb.b_dct_decimate = h->sh.i_type == SLICE_TYPE_B ||
(h->param.analyse.b_dct_decimate && h->sh.i_type != SLICE_TYPE_I);
-
+ h->mb.i_mb_prev_xy = -1;
/* fdec: fenc:
* yyyyyyy
@@ -489,18 +495,20 @@
dst[i*FDEC_STRIDE] = src[i*FDEC_STRIDE];
}
-static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x, int mb_y, int i, int b_interlaced )
+static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x, int mb_y, int i, int b_mbaff )
{
int w = (i ? 8 : 16);
int i_stride = h->fdec->i_stride[i];
- int i_stride2 = i_stride << b_interlaced;
- int i_pix_offset = b_interlaced
+ int i_stride2 = i_stride << MB_INTERLACED;
+ int i_pix_offset = MB_INTERLACED
? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
: 16 * mb_x + w * mb_y * i_stride;
pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
- pixel *intra_fdec = &h->intra_border_backup[mb_y&1][i][mb_x*16];
+ int fdec_idx = b_mbaff ? (MB_INTERLACED ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : 0;
+ pixel *intra_fdec = &h->intra_border_backup[fdec_idx][i][mb_x*16];
int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
- if( b_interlaced )
+ /* ref_pix_offset[0] references the current field and [1] the opposite field. */
+ if( MB_INTERLACED )
ref_pix_offset[1] += (1-2*(mb_y&1)) * i_stride;
h->mb.pic.i_stride[i] = i_stride2;
h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset];
@@ -509,13 +517,20 @@
h->mc.load_deinterleave_8x8x2_fenc( h->mb.pic.p_fenc[1], h->mb.pic.p_fenc_plane[1], i_stride2 );
memcpy( h->mb.pic.p_fdec[1]-FDEC_STRIDE, intra_fdec, 8*sizeof(pixel) );
memcpy( h->mb.pic.p_fdec[2]-FDEC_STRIDE, intra_fdec+8, 8*sizeof(pixel) );
+ if( b_mbaff )
+ {
+ h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = intra_fdec[-1-8];
+ h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = intra_fdec[-1];
+ }
}
else
{
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, h->mb.pic.p_fenc_plane[0], i_stride2, 16 );
memcpy( h->mb.pic.p_fdec[0]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
+ if( b_mbaff )
+ h->mb.pic.p_fdec[0][-FDEC_STRIDE-1] = intra_fdec[-1];
}
- if( b_interlaced )
+ if( b_mbaff )
{
for( int j = 0; j < w; j++ )
if( i )
@@ -526,15 +541,28 @@
else
h->mb.pic.p_fdec[0][-1+j*FDEC_STRIDE] = plane_fdec[-1+j*i_stride2];
}
+ pixel *plane_src, **filtered_src;
for( int j = 0; j < h->mb.pic.i_fref[0]; j++ )
{
- h->mb.pic.p_fref[0][j][i?4:0] = &h->fref[0][j >> b_interlaced]->plane[i][ref_pix_offset[j&1]];
+ // Interpolate between pixels in same field.
+ if( MB_INTERLACED )
+ {
+ plane_src = h->fref[0][j>>1]->plane_fld[i];
+ filtered_src = h->fref[0][j>>1]->filtered_fld;
+ }
+ else
+ {
+ plane_src = h->fref[0][j]->plane[i];
+ filtered_src = h->fref[0][j]->filtered;
+ }
+ h->mb.pic.p_fref[0][j][i?4:0] = plane_src + ref_pix_offset[j&1];
+
if( !i )
{
for( int k = 1; k < 4; k++ )
- h->mb.pic.p_fref[0][j][k] = &h->fref[0][j >> b_interlaced]->filtered[k][ref_pix_offset[j&1]];
+ h->mb.pic.p_fref[0][j][k] = filtered_src[k] + ref_pix_offset[j&1];
if( h->sh.weight[j][0].weightfn )
- h->mb.pic.p_fref_w[j] = &h->fenc->weighted[j >> b_interlaced][ref_pix_offset[j&1]];
+ h->mb.pic.p_fref_w[j] = &h->fenc->weighted[j >> MB_INTERLACED][ref_pix_offset[j&1]];
else
h->mb.pic.p_fref_w[j] = h->mb.pic.p_fref[0][j][0];
}
@@ -542,57 +570,160 @@
if( h->sh.i_type == SLICE_TYPE_B )
for( int j = 0; j < h->mb.pic.i_fref[1]; j++ )
{
- h->mb.pic.p_fref[1][j][i?4:0] = &h->fref[1][j >> b_interlaced]->plane[i][ref_pix_offset[j&1]];
+ if( MB_INTERLACED )
+ {
+ plane_src = h->fref[1][j>>1]->plane_fld[i];
+ filtered_src = h->fref[1][j>>1]->filtered_fld;
+ }
+ else
+ {
+ plane_src = h->fref[1][j]->plane[i];
+ filtered_src = h->fref[1][j]->filtered;
+ }
+ h->mb.pic.p_fref[1][j][i?4:0] = plane_src + ref_pix_offset[j&1];
+
if( !i )
for( int k = 1; k < 4; k++ )
- h->mb.pic.p_fref[1][j][k] = &h->fref[1][j >> b_interlaced]->filtered[k][ref_pix_offset[j&1]];
+ h->mb.pic.p_fref[1][j][k] = filtered_src[k] + ref_pix_offset[j&1];
}
}
-static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y )
+static const x264_left_table_t left_indices[4] =
{
- int top = (mb_y - (1 << h->mb.b_interlaced)) * h->mb.i_mb_stride + mb_x;
+ /* Current is progressive */
+ {{ 4, 4, 5, 5}, { 3, 3, 7, 7}, {16+1, 16+1, 16+4+1, 16+4+1}, {0, 0, 1, 1}, {0, 0, 0, 0}},
+ {{ 6, 6, 3, 3}, {11, 11, 15, 15}, {16+3, 16+3, 16+4+3, 16+4+3}, {2, 2, 3, 3}, {1, 1, 1, 1}},
+ /* Current is interlaced */
+ {{ 4, 6, 4, 6}, { 3, 11, 3, 11}, {16+1, 16+1, 16+4+1, 16+4+1}, {0, 2, 0, 2}, {0, 1, 0, 1}},
+ /* Both same */
+ {{ 4, 5, 6, 3}, { 3, 7, 11, 15}, {16+1, 16+3, 16+4+1, 16+4+3}, {0, 1, 2, 3}, {0, 0, 1, 1}}
+};
+
+static void ALWAYS_INLINE x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y, int b_interlaced )
+{
+ const int mb_interlaced = b_interlaced && MB_INTERLACED;
+ int top_y = mb_y - (1 << mb_interlaced);
+ int top = top_y * h->mb.i_mb_stride + mb_x;
h->mb.i_mb_x = mb_x;
h->mb.i_mb_y = mb_y;
h->mb.i_mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
h->mb.i_b8_xy = 2*(mb_y * h->mb.i_b8_stride + mb_x);
h->mb.i_b4_xy = 4*(mb_y * h->mb.i_b4_stride + mb_x);
+ h->mb.left_b8[0] =
+ h->mb.left_b8[1] = -1;
+ h->mb.left_b4[0] =
+ h->mb.left_b4[1] = -1;
h->mb.i_neighbour = 0;
h->mb.i_neighbour_intra = 0;
h->mb.i_neighbour_frame = 0;
h->mb.i_mb_top_xy = -1;
- h->mb.i_mb_left_xy = -1;
+ h->mb.i_mb_top_y = -1;
+ h->mb.i_mb_left_xy[0] = h->mb.i_mb_left_xy[1] = -1;
h->mb.i_mb_topleft_xy = -1;
h->mb.i_mb_topright_xy = -1;
h->mb.i_mb_type_top = -1;
- h->mb.i_mb_type_left = -1;
+ h->mb.i_mb_type_left[0] = h->mb.i_mb_type_left[1] = -1;
h->mb.i_mb_type_topleft = -1;
h->mb.i_mb_type_topright = -1;
+ h->mb.left_index_table = &left_indices[3];
+ h->mb.topleft_partition = 0;
+
+ int topleft_y = top_y;
+ int topright_y = top_y;
+ int left[2];
+
+ left[0] = left[1] = h->mb.i_mb_xy - 1;
+ h->mb.left_b8[0] = h->mb.left_b8[1] = h->mb.i_b8_xy - 2;
+ h->mb.left_b4[0] = h->mb.left_b4[1] = h->mb.i_b4_xy - 4;
+
+ if( b_interlaced )
+ {
+ h->mb.i_mb_top_mbpair_xy = h->mb.i_mb_xy - 2*h->mb.i_mb_stride;
+ h->mb.i_mb_topleft_y = -1;
+ h->mb.i_mb_topright_y = -1;
+
+ if( mb_y&1 )
+ {
+ if( mb_x && mb_interlaced != h->mb.field[h->mb.i_mb_xy-1] )
+ {
+ left[0] = left[1] = h->mb.i_mb_xy - 1 - h->mb.i_mb_stride;
+ h->mb.left_b8[0] = h->mb.left_b8[1] = h->mb.i_b8_xy - 2 - 2*h->mb.i_b8_stride;
+ h->mb.left_b4[0] = h->mb.left_b4[1] = h->mb.i_b4_xy - 4 - 4*h->mb.i_b4_stride;
+
+ if( mb_interlaced )
+ {
+ h->mb.left_index_table = &left_indices[2];
+ left[1] += h->mb.i_mb_stride;
+ h->mb.left_b8[1] += 2*h->mb.i_b8_stride;
+ h->mb.left_b4[1] += 4*h->mb.i_b4_stride;
+ }
+ else
+ {
+ h->mb.left_index_table = &left_indices[1];
+ topleft_y++;
+ h->mb.topleft_partition = 1;
+ }
+ }
+ if( !mb_interlaced )
+ topright_y = -1;
+ }
+ else
+ {
+ if( mb_interlaced && top >= 0 )
+ {
+ if( !h->mb.field[top] )
+ {
+ top += h->mb.i_mb_stride;
+ top_y++;
+ }
+ if( mb_x )
+ topleft_y += !h->mb.field[h->mb.i_mb_stride*topleft_y + mb_x - 1];
+ if( mb_x < h->mb.i_mb_width-1 )
+ topright_y += !h->mb.field[h->mb.i_mb_stride*topright_y + mb_x + 1];
+ }
+ if( mb_x && mb_interlaced != h->mb.field[h->mb.i_mb_xy-1] )
+ {
+ if( mb_interlaced )
+ {
+ h->mb.left_index_table = &left_indices[2];
+ left[1] += h->mb.i_mb_stride;
+ h->mb.left_b8[1] += 2*h->mb.i_b8_stride;
+ h->mb.left_b4[1] += 4*h->mb.i_b4_stride;
+ }
+ else
+ h->mb.left_index_table = &left_indices[0];
+ }
+ }
+ }
if( mb_x > 0 )
{
h->mb.i_neighbour_frame |= MB_LEFT;
- h->mb.i_mb_left_xy = h->mb.i_mb_xy - 1;
- h->mb.i_mb_type_left = h->mb.type[h->mb.i_mb_left_xy];
- if( h->mb.i_mb_xy > h->sh.i_first_mb )
+ h->mb.i_mb_left_xy[0] = left[0];
+ h->mb.i_mb_left_xy[1] = left[1];
+ h->mb.i_mb_type_left[0] = h->mb.type[h->mb.i_mb_left_xy[0]];
+ h->mb.i_mb_type_left[1] = h->mb.type[h->mb.i_mb_left_xy[1]];
+ if( h->mb.slice_table[left[0]] == h->sh.i_first_mb )
{
h->mb.i_neighbour |= MB_LEFT;
- if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left ) )
+ // FIXME: We don't currently support constrained intra + mbaff.
+ if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left[0] ) )
h->mb.i_neighbour_intra |= MB_LEFT;
}
}
/* We can't predict from the previous threadslice since it hasn't been encoded yet. */
- if( (h->i_threadslice_start >> h->mb.b_interlaced) != (mb_y >> h->mb.b_interlaced) )
+ if( (h->i_threadslice_start >> mb_interlaced) != (mb_y >> mb_interlaced) )
{
if( top >= 0 )
{
h->mb.i_neighbour_frame |= MB_TOP;
h->mb.i_mb_top_xy = top;
+ h->mb.i_mb_top_y = top_y;
h->mb.i_mb_type_top = h->mb.type[h->mb.i_mb_top_xy];
- if( top >= h->sh.i_first_mb )
+ if( h->mb.slice_table[top] == h->sh.i_first_mb )
{
h->mb.i_neighbour |= MB_TOP;
@@ -611,12 +742,13 @@
}
}
- if( mb_x > 0 && top - 1 >= 0 )
+ if( mb_x > 0 && topleft_y >= 0 )
{
h->mb.i_neighbour_frame |= MB_TOPLEFT;
- h->mb.i_mb_topleft_xy = top - 1;
+ h->mb.i_mb_topleft_xy = h->mb.i_mb_stride*topleft_y + mb_x - 1;
+ h->mb.i_mb_topleft_y = topleft_y;
h->mb.i_mb_type_topleft = h->mb.type[h->mb.i_mb_topleft_xy];
- if( top - 1 >= h->sh.i_first_mb )
+ if( h->mb.slice_table[h->mb.i_mb_topleft_xy] == h->sh.i_first_mb )
{
h->mb.i_neighbour |= MB_TOPLEFT;
@@ -625,12 +757,13 @@
}
}
- if( mb_x < h->mb.i_mb_width - 1 && top + 1 >= 0 )
+ if( mb_x < h->mb.i_mb_width - 1 && topright_y >= 0 )
{
h->mb.i_neighbour_frame |= MB_TOPRIGHT;
- h->mb.i_mb_topright_xy = top + 1;
+ h->mb.i_mb_topright_xy = h->mb.i_mb_stride*topright_y + mb_x + 1;
+ h->mb.i_mb_topright_y = topright_y;
h->mb.i_mb_type_topright = h->mb.type[h->mb.i_mb_topright_xy];
- if( top + 1 >= h->sh.i_first_mb )
+ if( h->mb.slice_table[h->mb.i_mb_topright_xy] == h->sh.i_first_mb )
{
h->mb.i_neighbour |= MB_TOPRIGHT;
@@ -641,13 +774,20 @@
}
}
-void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
+#define LTOP 0
+#if HAVE_INTERLACED
+# define LBOT 1
+#else
+# define LBOT 0
+#endif
+
+void ALWAYS_INLINE x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y, int b_mbaff )
{
- x264_macroblock_cache_load_neighbours( h, mb_x, mb_y );
+ x264_macroblock_cache_load_neighbours( h, mb_x, mb_y, b_mbaff );
- int left = h->mb.i_mb_left_xy;
+ int *left = h->mb.i_mb_left_xy;
int top = h->mb.i_mb_top_xy;
- int top_y = mb_y - (1 << h->mb.b_interlaced);
+ int top_y = h->mb.i_mb_top_y;
int s8x8 = h->mb.i_b8_stride;
int s4x4 = h->mb.i_b4_stride;
int top_8x8 = (2*top_y+1) * s8x8 + 2*mb_x;
@@ -660,6 +800,8 @@
uint8_t (*nnz)[24] = h->mb.non_zero_count;
int16_t *cbp = h->mb.cbp;
+ const x264_left_table_t *left_index_table = h->mb.left_index_table;
+
/* load cache */
if( h->mb.i_neighbour & MB_TOP )
{
@@ -699,25 +841,53 @@
if( h->mb.i_neighbour & MB_LEFT )
{
- h->mb.cache.i_cbp_left = cbp[left];
+ if( b_mbaff )
+ {
+ const int16_t top_luma = (cbp[left[LTOP]] >> (left_index_table->mv[0]&(~1))) & 2;
+ const int16_t bot_luma = (cbp[left[LBOT]] >> (left_index_table->mv[2]&(~1))) & 2;
+ h->mb.cache.i_cbp_left = (cbp[left[LTOP]] & 0xfff0) | (bot_luma<<2) | top_luma;
+ }
+ else
+ h->mb.cache.i_cbp_left = cbp[left[0]];
+ if( b_mbaff )
+ {
+ /* load intra4x4 */
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left[LTOP]][left_index_table->intra[0]];
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left[LTOP]][left_index_table->intra[1]];
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left[LBOT]][left_index_table->intra[2]];
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left[LBOT]][left_index_table->intra[3]];
+
+ /* load non_zero_count */
+ h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left[LTOP]][left_index_table->nnz[0]];
+ h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left[LTOP]][left_index_table->nnz[1]];
+ h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left[LBOT]][left_index_table->nnz[2]];
+ h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left[LBOT]][left_index_table->nnz[3]];
- /* load intra4x4 */
- h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left][4];
- h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left][5];
- h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left][6];
- h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left][3];
+ h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left[LTOP]][left_index_table->nnz_chroma[0]];
+ h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left[LBOT]][left_index_table->nnz_chroma[1]];
- /* load non_zero_count */
- h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][3];
- h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][7];
- h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][11];
- h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][15];
+ h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left[LTOP]][left_index_table->nnz_chroma[2]];
+ h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left[LBOT]][left_index_table->nnz_chroma[3]];
+ }
+ else
+ {
+ int l = left[0];
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[l][left_index_table->intra[0]];
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[l][left_index_table->intra[1]];
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[l][left_index_table->intra[2]];
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[l][left_index_table->intra[3]];
+
+ h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[l][left_index_table->nnz[0]];
+ h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[l][left_index_table->nnz[1]];
+ h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[l][left_index_table->nnz[2]];
+ h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[l][left_index_table->nnz[3]];
- h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left][16+1];
- h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left][16+3];
+ h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[l][left_index_table->nnz_chroma[0]];
+ h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[l][left_index_table->nnz_chroma[1]];
- h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left][16+4+1];
- h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left][16+4+3];
+ h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[l][left_index_table->nnz_chroma[2]];
+ h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[l][left_index_table->nnz_chroma[3]];
+ }
}
else
{
@@ -742,20 +912,17 @@
if( h->pps->b_transform_8x8_mode )
{
h->mb.cache.i_neighbour_transform_size =
- ( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left] )
+ ( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left[0]] )
+ ( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] );
}
- if( h->sh.b_mbaff )
+ if( b_mbaff )
{
- h->mb.pic.i_fref[0] = h->i_ref[0] << h->mb.b_interlaced;
- h->mb.pic.i_fref[1] = h->i_ref[1] << h->mb.b_interlaced;
- h->mb.cache.i_neighbour_interlaced =
- !!(h->mb.i_neighbour & MB_LEFT)
- + !!(h->mb.i_neighbour & MB_TOP);
+ h->mb.pic.i_fref[0] = h->i_ref[0] << MB_INTERLACED;
+ h->mb.pic.i_fref[1] = h->i_ref[1] << MB_INTERLACED;
}
- if( !h->mb.b_interlaced )
+ if( !b_mbaff )
{
x264_copy_column8( h->mb.pic.p_fdec[0]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+ 4*FDEC_STRIDE );
x264_copy_column8( h->mb.pic.p_fdec[0]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+12*FDEC_STRIDE );
@@ -789,8 +956,17 @@
int i8 = x264_scan8[0] - 1 - 1*8;
if( h->mb.i_neighbour & MB_TOPLEFT )
{
- h->mb.cache.ref[l][i8] = ref[top_8x8 - 1];
- CP32( h->mb.cache.mv[l][i8], mv[top_4x4 - 1] );
+ int ir = b_mbaff ? 2*(s8x8*h->mb.i_mb_topleft_y + mb_x-1)+1+s8x8 : top_8x8 - 1;
+ int iv = b_mbaff ? 4*(s4x4*h->mb.i_mb_topleft_y + mb_x-1)+3+3*s4x4 : top_4x4 - 1;
+ if( b_mbaff && h->mb.topleft_partition )
+ {
+ /* Take motion vector from the middle of macroblock instead of
+ * the bottom right as usual. */
+ iv -= 2*s4x4;
+ ir -= s8x8;
+ }
+ h->mb.cache.ref[l][i8] = ref[ir];
+ CP32( h->mb.cache.mv[l][i8], mv[iv] );
}
else
{
@@ -816,8 +992,10 @@
i8 = x264_scan8[0] + 4 - 1*8;
if( h->mb.i_neighbour & MB_TOPRIGHT )
{
- h->mb.cache.ref[l][i8] = ref[top_8x8 + 2];
- CP32( h->mb.cache.mv[l][i8], mv[top_4x4 + 4] );
+ int ir = b_mbaff ? 2*(s8x8*h->mb.i_mb_topright_y + (mb_x+1))+s8x8 : top_8x8 + 2;
+ int iv = b_mbaff ? 4*(s4x4*h->mb.i_mb_topright_y + (mb_x+1))+3*s4x4 : top_4x4 + 4;
+ h->mb.cache.ref[l][i8] = ref[ir];
+ CP32( h->mb.cache.mv[l][i8], mv[iv] );
}
else
h->mb.cache.ref[l][i8] = -2;
@@ -825,17 +1003,32 @@
i8 = x264_scan8[0] - 1;
if( h->mb.i_neighbour & MB_LEFT )
{
- const int ir = h->mb.i_b8_xy - 1;
- const int iv = h->mb.i_b4_xy - 1;
- h->mb.cache.ref[l][i8+0*8] =
- h->mb.cache.ref[l][i8+1*8] = ref[ir + 0*s8x8];
- h->mb.cache.ref[l][i8+2*8] =
- h->mb.cache.ref[l][i8+3*8] = ref[ir + 1*s8x8];
-
- CP32( h->mb.cache.mv[l][i8+0*8], mv[iv + 0*s4x4] );
- CP32( h->mb.cache.mv[l][i8+1*8], mv[iv + 1*s4x4] );
- CP32( h->mb.cache.mv[l][i8+2*8], mv[iv + 2*s4x4] );
- CP32( h->mb.cache.mv[l][i8+3*8], mv[iv + 3*s4x4] );
+ if( b_mbaff )
+ {
+ h->mb.cache.ref[l][i8+0*8] = ref[h->mb.left_b8[LTOP] + 1 + s8x8*left_index_table->ref[0]];
+ h->mb.cache.ref[l][i8+1*8] = ref[h->mb.left_b8[LTOP] + 1 + s8x8*left_index_table->ref[1]];
+ h->mb.cache.ref[l][i8+2*8] = ref[h->mb.left_b8[LBOT] + 1 + s8x8*left_index_table->ref[2]];
+ h->mb.cache.ref[l][i8+3*8] = ref[h->mb.left_b8[LBOT] + 1 + s8x8*left_index_table->ref[3]];
+
+ CP32( h->mb.cache.mv[l][i8+0*8], mv[h->mb.left_b4[LTOP] + 3 + s4x4*left_index_table->mv[0]] );
+ CP32( h->mb.cache.mv[l][i8+1*8], mv[h->mb.left_b4[LTOP] + 3 + s4x4*left_index_table->mv[1]] );
+ CP32( h->mb.cache.mv[l][i8+2*8], mv[h->mb.left_b4[LBOT] + 3 + s4x4*left_index_table->mv[2]] );
+ CP32( h->mb.cache.mv[l][i8+3*8], mv[h->mb.left_b4[LBOT] + 3 + s4x4*left_index_table->mv[3]] );
+ }
+ else
+ {
+ const int ir = h->mb.i_b8_xy - 1;
+ const int iv = h->mb.i_b4_xy - 1;
+ h->mb.cache.ref[l][i8+0*8] =
+ h->mb.cache.ref[l][i8+1*8] = ref[ir + 0*s8x8];
+ h->mb.cache.ref[l][i8+2*8] =
+ h->mb.cache.ref[l][i8+3*8] = ref[ir + 1*s8x8];
+
+ CP32( h->mb.cache.mv[l][i8+0*8], mv[iv + 0*s4x4] );
+ CP32( h->mb.cache.mv[l][i8+1*8], mv[iv + 1*s4x4] );
+ CP32( h->mb.cache.mv[l][i8+2*8], mv[iv + 2*s4x4] );
+ CP32( h->mb.cache.mv[l][i8+3*8], mv[iv + 3*s4x4] );
+ }
}
else
{
@@ -846,6 +1039,39 @@
}
}
+ /* Extra logic for top right mv in mbaff.
+ * . . . d . . a .
+ * . . . e . . . .
+ * . . . f b . c .
+ * . . . . . . . .
+ *
+ * If the top right of the 4x4 partitions labeled a, b and c in the
+ * above diagram do not exist, but the entries d, e and f exist (in
+ * the macroblock to the left) then use those instead.
+ */
+ if( b_mbaff && (h->mb.i_neighbour & MB_LEFT) )
+ {
+ if( MB_INTERLACED && !h->mb.field[h->mb.i_mb_xy-1] )
+ {
+ h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*0];
+ h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*1];
+ h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[1] + 1 + s8x8*0];
+ CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*(left_index_table->mv[0]+1)] );
+ CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*(left_index_table->mv[1]+1)] );
+ CP32( h->mb.cache.topright_mv[l][2], mv[h->mb.left_b4[1] + 3 + s4x4*(left_index_table->mv[2]+1)] );
+ }
+ else if( !MB_INTERLACED && h->mb.field[h->mb.i_mb_xy-1] )
+ {
+ // Looking at the bottom field so always take the bottom macroblock of the pair.
+ h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]];
+ h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]];
+ h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[2]];
+ CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[0]] );
+ CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[1]] );
+ CP32( h->mb.cache.topright_mv[l][2], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[2]] );
+ }
+ }
+
if( h->param.b_cabac )
{
uint8_t (*mvd)[8][2] = h->mb.mvd[l];
@@ -854,31 +1080,169 @@
else
M64( h->mb.cache.mvd[l][x264_scan8[0] - 8] ) = 0;
+ if( h->mb.i_neighbour & MB_LEFT && (!b_mbaff || h->mb.cache.ref[l][x264_scan8[0]-1] >= 0) )
+ {
+ CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left[LTOP]][left_index_table->intra[0]] );
+ CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left[LTOP]][left_index_table->intra[1]] );
+ }
+ else
+ {
+ M16( h->mb.cache.mvd[l][x264_scan8[0]-1+0*8] ) = 0;
+ M16( h->mb.cache.mvd[l][x264_scan8[0]-1+1*8] ) = 0;
+ }
+ if( h->mb.i_neighbour & MB_LEFT && (!b_mbaff || h->mb.cache.ref[l][x264_scan8[0]-1+2*8] >=0) )
+ {
+ CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left[LBOT]][left_index_table->intra[2]] );
+ CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left[LBOT]][left_index_table->intra[3]] );
+ }
+ else
+ {
+ M16( h->mb.cache.mvd[l][x264_scan8[0]-1+2*8] ) = 0;
+ M16( h->mb.cache.mvd[l][x264_scan8[0]-1+3*8] ) = 0;
+ }
+ }
+
+ /* If motion vectors are cached from frame macroblocks but this
+ * macroblock is a field macroblock then the motion vector must be
+ * halved. Similarly, motion vectors from field macroblocks are doubled. */
+ if( b_mbaff )
+ {
+#define MAP_MVS\
+ if( FIELD_DIFFERENT(h->mb.i_mb_topleft_xy) )\
+ MAP_F2F(mv, ref, x264_scan8[0] - 1 - 1*8)\
+ if( FIELD_DIFFERENT(top) )\
+ {\
+ MAP_F2F(mv, ref, x264_scan8[0] + 0 - 1*8)\
+ MAP_F2F(mv, ref, x264_scan8[0] + 1 - 1*8)\
+ MAP_F2F(mv, ref, x264_scan8[0] + 2 - 1*8)\
+ MAP_F2F(mv, ref, x264_scan8[0] + 3 - 1*8)\
+ }\
+ if( FIELD_DIFFERENT(h->mb.i_mb_topright_xy) )\
+ MAP_F2F(mv, ref, x264_scan8[0] + 4 - 1*8)\
+ if( FIELD_DIFFERENT(left[0]) )\
+ {\
+ MAP_F2F(mv, ref, x264_scan8[0] - 1 + 0*8)\
+ MAP_F2F(mv, ref, x264_scan8[0] - 1 + 1*8)\
+ MAP_F2F(mv, ref, x264_scan8[0] - 1 + 2*8)\
+ MAP_F2F(mv, ref, x264_scan8[0] - 1 + 3*8)\
+ MAP_F2F(topright_mv, topright_ref, 0)\
+ MAP_F2F(topright_mv, topright_ref, 1)\
+ MAP_F2F(topright_mv, topright_ref, 2)\
+ }
+
+ if( MB_INTERLACED )
+ {
+#define FIELD_DIFFERENT(macroblock) (macroblock >= 0 && !h->mb.field[macroblock])
+#define MAP_F2F(varmv, varref, index)\
+ if( h->mb.cache.varref[l][index] >= 0 )\
+ {\
+ h->mb.cache.varref[l][index] <<= 1;\
+ h->mb.cache.varmv[l][index][1] /= 2;\
+ h->mb.cache.mvd[l][index][1] >>= 1;\
+ }
+ MAP_MVS
+#undef MAP_F2F
+#undef FIELD_DIFFERENT
+ }
+ else
+ {
+#define FIELD_DIFFERENT(macroblock) (macroblock >= 0 && h->mb.field[macroblock])
+#define MAP_F2F(varmv, varref, index)\
+ if( h->mb.cache.varref[l][index] >= 0 )\
+ {\
+ h->mb.cache.varref[l][index] >>= 1;\
+ h->mb.cache.varmv[l][index][1] <<= 1;\
+ h->mb.cache.mvd[l][index][1] <<= 1;\
+ }
+ MAP_MVS
+#undef MAP_F2F
+#undef FIELD_DIFFERENT
+ }
+ }
+ }
+
+ if( b_mbaff && mb_x == 0 && !(mb_y&1) && mb_y > 0 )
+ h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_xy - h->mb.i_mb_stride];
+
+ /* Check whether skip here would cause decoder to predict interlace mode incorrectly.
+ * FIXME: It might be better to change the interlace type rather than forcing a skip to be non-skip. */
+ h->mb.b_allow_skip = 1;
+ if( b_mbaff )
+ {
+ if( MB_INTERLACED != h->mb.field_decoding_flag &&
+ h->mb.i_mb_prev_xy >= 0 && IS_SKIP(h->mb.type[h->mb.i_mb_prev_xy]) )
+ h->mb.b_allow_skip = 0;
+ if( (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
+ {
if( h->mb.i_neighbour & MB_LEFT )
{
- CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left][4] );
- CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left][5] );
- CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left][6] );
- CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left][3] );
+ if( h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED )
+ h->mb.b_allow_skip = 0;
+ }
+ else if( h->mb.i_neighbour & MB_TOP )
+ {
+ if( h->mb.field[h->mb.i_mb_top_xy] != MB_INTERLACED )
+ h->mb.b_allow_skip = 0;
+ }
+ else // Frame mb pair is predicted
+ {
+ if( MB_INTERLACED )
+ h->mb.b_allow_skip = 0;
+ }
+ }
+ }
+
+ if( h->param.b_cabac )
+ {
+ if( b_mbaff )
+ {
+ int left_xy, top_xy;
+ /* Neighbours here are calculated based on field_decoding_flag */
+ int mb_xy = mb_x + (mb_y&~1)*h->mb.i_mb_stride;
+ left_xy = mb_xy - 1;
+ if( (mb_y&1) && mb_x > 0 && h->mb.field_decoding_flag == h->mb.field[left_xy] )
+ left_xy += h->mb.i_mb_stride;
+ if( h->mb.field_decoding_flag )
+ {
+ top_xy = mb_xy - h->mb.i_mb_stride;
+ if( !(mb_y&1) && top_xy >= 0 && h->mb.slice_table[top_xy] == h->sh.i_first_mb && h->mb.field[top_xy] )
+ top_xy -= h->mb.i_mb_stride;
}
else
- for( int i = 0; i < 4; i++ )
- M16( h->mb.cache.mvd[l][x264_scan8[0]-1+i*8] ) = 0;
+ top_xy = mb_x + (mb_y-1)*h->mb.i_mb_stride;
+
+ h->mb.cache.i_neighbour_skip = (mb_x > 0 && h->mb.slice_table[left_xy] == h->sh.i_first_mb && !IS_SKIP( h->mb.type[left_xy] ))
+ + (top_xy >= 0 && h->mb.slice_table[top_xy] == h->sh.i_first_mb && !IS_SKIP( h->mb.type[top_xy] ));
+ }
+ else
+ {
+ h->mb.cache.i_neighbour_skip = ((h->mb.i_neighbour & MB_LEFT) && !IS_SKIP( h->mb.i_mb_type_left[0] ))
+ + ((h->mb.i_neighbour & MB_TOP) && !IS_SKIP( h->mb.i_mb_type_top ));
}
}
/* load skip */
if( h->sh.i_type == SLICE_TYPE_B )
{
- h->mb.bipred_weight = h->mb.bipred_weight_buf[h->mb.b_interlaced&(mb_y&1)];
- h->mb.dist_scale_factor = h->mb.dist_scale_factor_buf[h->mb.b_interlaced&(mb_y&1)];
+ h->mb.bipred_weight = h->mb.bipred_weight_buf[MB_INTERLACED][MB_INTERLACED&(mb_y&1)];
+ h->mb.dist_scale_factor = h->mb.dist_scale_factor_buf[MB_INTERLACED][MB_INTERLACED&(mb_y&1)];
if( h->param.b_cabac )
{
uint8_t skipbp;
x264_macroblock_cache_skip( h, 0, 0, 4, 4, 0 );
- skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left] : 0;
- h->mb.cache.skip[x264_scan8[0] - 1] = skipbp & 0x2;
- h->mb.cache.skip[x264_scan8[8] - 1] = skipbp & 0x8;
+ if( b_mbaff )
+ {
+ skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left[LTOP]] : 0;
+ h->mb.cache.skip[x264_scan8[0] - 1] = (skipbp >> (1+(left_index_table->mv[0]&~1))) & 1;
+ skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left[LBOT]] : 0;
+ h->mb.cache.skip[x264_scan8[8] - 1] = (skipbp >> (1+(left_index_table->mv[2]&~1))) & 1;
+ }
+ else
+ {
+ skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left[0]] : 0;
+ h->mb.cache.skip[x264_scan8[0] - 1] = skipbp & 0x2;
+ h->mb.cache.skip[x264_scan8[8] - 1] = skipbp & 0x8;
+ }
skipbp = (h->mb.i_neighbour & MB_TOP) ? h->mb.skipbp[top] : 0;
h->mb.cache.skip[x264_scan8[0] - 8] = skipbp & 0x4;
h->mb.cache.skip[x264_scan8[4] - 8] = skipbp & 0x8;
@@ -902,36 +1266,67 @@
| ((h->mb.i_neighbour_intra & MB_TOP) ? MB_TOP|MB_TOPLEFT : 0);
}
+void x264_macroblock_cache_load_progressive( x264_t *h, int mb_x, int mb_y )
+{
+ x264_macroblock_cache_load( h, mb_x, mb_y, 0 );
+}
+
+void x264_macroblock_cache_load_interlaced( x264_t *h, int mb_x, int mb_y )
+{
+ x264_macroblock_cache_load( h, mb_x, mb_y, 1 );
+}
+
void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_y )
{
int deblock_on_slice_edges = h->sh.i_disable_deblocking_filter_idc != 2;
h->mb.i_neighbour = 0;
h->mb.i_mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
-
- if( mb_x > 0 )
+ h->mb.b_interlaced = PARAM_INTERLACED && h->mb.field[h->mb.i_mb_xy];
+ h->mb.i_mb_top_y = mb_y - (1 << MB_INTERLACED);
+ h->mb.i_mb_top_xy = mb_x + h->mb.i_mb_stride*h->mb.i_mb_top_y;
+ h->mb.i_mb_left_xy[1] =
+ h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
+ if( SLICE_MBAFF )
{
- h->mb.i_mb_left_xy = h->mb.i_mb_xy - 1;
- if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_left_xy] == h->mb.slice_table[h->mb.i_mb_xy] )
- h->mb.i_neighbour |= MB_LEFT;
+ if( mb_y&1 )
+ {
+ if( mb_x && h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED )
+ h->mb.i_mb_left_xy[0] -= h->mb.i_mb_stride;
+ }
+ else
+ {
+ if( h->mb.i_mb_top_xy >= 0 && MB_INTERLACED && !h->mb.field[h->mb.i_mb_top_xy] )
+ {
+ h->mb.i_mb_top_xy += h->mb.i_mb_stride;
+ h->mb.i_mb_top_y++;
+ }
+ if( mb_x && h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED )
+ h->mb.i_mb_left_xy[1] += h->mb.i_mb_stride;
+ }
}
- if( mb_y > h->mb.b_interlaced )
- {
- h->mb.i_mb_top_xy = h->mb.i_mb_xy - (h->mb.i_mb_stride << h->mb.b_interlaced);
- if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_top_xy] == h->mb.slice_table[h->mb.i_mb_xy] )
- h->mb.i_neighbour |= MB_TOP;
- }
+ if( mb_x > 0 && (deblock_on_slice_edges ||
+ h->mb.slice_table[h->mb.i_mb_left_xy[0]] == h->mb.slice_table[h->mb.i_mb_xy]) )
+ h->mb.i_neighbour |= MB_LEFT;
+ if( mb_y > MB_INTERLACED && (deblock_on_slice_edges
+ || h->mb.slice_table[h->mb.i_mb_top_xy] == h->mb.slice_table[h->mb.i_mb_xy]) )
+ h->mb.i_neighbour |= MB_TOP;
}
-void x264_macroblock_cache_load_deblock( x264_t *h )
+void x264_macroblock_deblock_strength( x264_t *h )
{
+ uint8_t (*bs)[8][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) )
- return;
+ {
+ memset( bs[0], 3, 4*4*sizeof(uint8_t) );
+ memset( bs[1], 3, 4*4*sizeof(uint8_t) );
+ if( !SLICE_MBAFF ) return;
+ }
/* If we have multiple slices and we're deblocking on slice edges, we
* have to reload neighbour data. */
- if( h->sh.i_first_mb && h->sh.i_disable_deblocking_filter_idc != 2 )
+ if( SLICE_MBAFF || (h->sh.i_first_mb && h->sh.i_disable_deblocking_filter_idc != 2) )
{
int old_neighbour = h->mb.i_neighbour;
int mb_x = h->mb.i_mb_x;
@@ -941,24 +1336,25 @@
h->mb.i_neighbour &= ~old_neighbour;
if( h->mb.i_neighbour )
{
- int top_y = mb_y - (1 << h->mb.b_interlaced);
+ int top_y = h->mb.i_mb_top_y;
int top_8x8 = (2*top_y+1) * h->mb.i_b8_stride + 2*mb_x;
int top_4x4 = (4*top_y+3) * h->mb.i_b4_stride + 4*mb_x;
int s8x8 = h->mb.i_b8_stride;
int s4x4 = h->mb.i_b4_stride;
uint8_t (*nnz)[24] = h->mb.non_zero_count;
+ const x264_left_table_t *left_index_table = SLICE_MBAFF ? h->mb.left_index_table : &left_indices[3];
if( h->mb.i_neighbour & MB_TOP )
CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &nnz[h->mb.i_mb_top_xy][12] );
if( h->mb.i_neighbour & MB_LEFT )
{
- int left = h->mb.i_mb_left_xy;
- h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][3];
- h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][7];
- h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][11];
- h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][15];
+ int *left = h->mb.i_mb_left_xy;
+ h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left[0]][left_index_table->nnz[0]];
+ h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left[0]][left_index_table->nnz[1]];
+ h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left[1]][left_index_table->nnz[2]];
+ h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left[1]][left_index_table->nnz[3]];
}
for( int l = 0; l <= (h->sh.i_type == SLICE_TYPE_B); l++ )
@@ -979,17 +1375,15 @@
i8 = x264_scan8[0] - 1;
if( h->mb.i_neighbour & MB_LEFT )
{
- int ir = h->mb.i_b8_xy - 1;
- int iv = h->mb.i_b4_xy - 1;
h->mb.cache.ref[l][i8+0*8] =
- h->mb.cache.ref[l][i8+1*8] = ref[ir + 0*s8x8];
+ h->mb.cache.ref[l][i8+1*8] = ref[h->mb.left_b8[0] + 1 + s8x8*left_index_table->ref[0]];
h->mb.cache.ref[l][i8+2*8] =
- h->mb.cache.ref[l][i8+3*8] = ref[ir + 1*s8x8];
+ h->mb.cache.ref[l][i8+3*8] = ref[h->mb.left_b8[1] + 1 + s8x8*left_index_table->ref[2]];
- CP32( h->mb.cache.mv[l][i8+0*8], mv[iv + 0*s4x4] );
- CP32( h->mb.cache.mv[l][i8+1*8], mv[iv + 1*s4x4] );
- CP32( h->mb.cache.mv[l][i8+2*8], mv[iv + 2*s4x4] );
- CP32( h->mb.cache.mv[l][i8+3*8], mv[iv + 3*s4x4] );
+ CP32( h->mb.cache.mv[l][i8+0*8], mv[h->mb.left_b4[0] + 3 + s4x4*left_index_table->mv[0]] );
+ CP32( h->mb.cache.mv[l][i8+1*8], mv[h->mb.left_b4[0] + 3 + s4x4*left_index_table->mv[1]] );
+ CP32( h->mb.cache.mv[l][i8+2*8], mv[h->mb.left_b4[1] + 3 + s4x4*left_index_table->mv[2]] );
+ CP32( h->mb.cache.mv[l][i8+3*8], mv[h->mb.left_b4[1] + 3 + s4x4*left_index_table->mv[3]] );
}
}
}
@@ -1029,7 +1423,7 @@
{
uint8_t (*nnz)[24] = h->mb.non_zero_count;
int top = h->mb.i_mb_top_xy;
- int left = h->mb.i_mb_left_xy;
+ int *left = h->mb.i_mb_left_xy;
if( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] )
{
@@ -1040,15 +1434,21 @@
M16( &h->mb.cache.non_zero_count[i8+2] ) = nnz_top1 ? 0x0101 : 0;
}
- if( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left] )
+ if( h->mb.i_neighbour & MB_LEFT )
{
int i8 = x264_scan8[0] - 1;
- int nnz_left0 = M16( &nnz[left][2] ) | M16( &nnz[left][6] );
- int nnz_left1 = M16( &nnz[left][10] ) | M16( &nnz[left][14] );
- h->mb.cache.non_zero_count[i8+8*0] = !!nnz_left0;
- h->mb.cache.non_zero_count[i8+8*1] = !!nnz_left0;
- h->mb.cache.non_zero_count[i8+8*2] = !!nnz_left1;
- h->mb.cache.non_zero_count[i8+8*3] = !!nnz_left1;
+ if( h->mb.mb_transform_size[left[0]] )
+ {
+ int nnz_left0 = M16( &nnz[left[0]][2] ) | M16( &nnz[left[0]][6] );
+ h->mb.cache.non_zero_count[i8+8*0] = !!nnz_left0;
+ h->mb.cache.non_zero_count[i8+8*1] = !!nnz_left0;
+ }
+ if( h->mb.mb_transform_size[left[1]] )
+ {
+ int nnz_left1 = M16( &nnz[left[1]][10] ) | M16( &nnz[left[1]][14] );
+ h->mb.cache.non_zero_count[i8+8*2] = !!nnz_left1;
+ h->mb.cache.non_zero_count[i8+8*3] = !!nnz_left1;
+ }
}
if( h->mb.mb_transform_size[h->mb.i_mb_xy] )
@@ -1066,43 +1466,55 @@
M32( &h->mb.cache.non_zero_count[x264_scan8[0]+8*3] ) = nnzbot;
}
}
-}
-static void ALWAYS_INLINE twiddle_topleft_pixel( pixel *dst, pixel *src, int b_interlaced )
-{
- // We update intra_border_backup in-place, so the topleft neighbor will no longer
- // exist there when load_pic_pointers wants it. Move it within p_fdec instead.
- if( b_interlaced )
- {
- dst[0] = dst[-1];
- dst[-1] = src[0];
- }
- else
- dst[0] = src[0];
+ int mvy_limit = 4 >> MB_INTERLACED;
+ h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
+ bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B, h );
}
-static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb_y, int i, int b_interlaced )
+static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb_y, int i, int b_mbaff )
{
int w = i ? 8 : 16;
int i_stride = h->fdec->i_stride[i];
- int i_stride2 = i_stride << b_interlaced;
- int i_pix_offset = b_interlaced
+ int i_stride2 = i_stride << (b_mbaff && MB_INTERLACED);
+ int i_pix_offset = (b_mbaff && MB_INTERLACED)
? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
: 16 * mb_x + w * mb_y * i_stride;
- pixel *intra_fdec = &h->intra_border_backup[mb_y&1][i][mb_x*16];
if( i )
- {
h->mc.store_interleave_8x8x2( &h->fdec->plane[1][i_pix_offset], i_stride2, h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2] );
- memcpy( intra_fdec, h->mb.pic.p_fdec[1]+FDEC_STRIDE*7, 8*sizeof(pixel) );
- memcpy( intra_fdec+8, h->mb.pic.p_fdec[2]+FDEC_STRIDE*7, 8*sizeof(pixel) );
- twiddle_topleft_pixel( h->mb.pic.p_fdec[1]-FDEC_STRIDE-1, h->mb.pic.p_fdec[1]-FDEC_STRIDE+7, b_interlaced );
- twiddle_topleft_pixel( h->mb.pic.p_fdec[2]-FDEC_STRIDE-1, h->mb.pic.p_fdec[2]-FDEC_STRIDE+7, b_interlaced );
+ else
+ h->mc.copy[PIXEL_16x16]( &h->fdec->plane[0][i_pix_offset], i_stride2, h->mb.pic.p_fdec[0], FDEC_STRIDE, 16 );
+}
+
+static void ALWAYS_INLINE x264_macroblock_backup_intra( x264_t *h, int mb_x, int mb_y, int b_mbaff )
+{
+ /* In MBAFF we store the last two rows in intra_border_backup[0] and [1].
+ * For progressive mbs this is the bottom two rows, and for interlaced the
+ * bottom row of each field. We also store samples needed for the next
+ * mbpair in intra_border_backup[2]. */
+ int backup_dst = !b_mbaff ? 0 : (mb_y&1) ? 1 : MB_INTERLACED ? 0 : 2;
+ memcpy( &h->intra_border_backup[backup_dst][0][mb_x*16 ], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) );
+ memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+FDEC_STRIDE*7, 8*sizeof(pixel) );
+ memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16+8], h->mb.pic.p_fdec[2]+FDEC_STRIDE*7, 8*sizeof(pixel) );
+ if( b_mbaff )
+ {
+ if( mb_y&1 )
+ {
+ int backup_src = (MB_INTERLACED ? 7 : 14) * FDEC_STRIDE;
+ backup_dst = MB_INTERLACED ? 2 : 0;
+ memcpy( &h->intra_border_backup[backup_dst][0][mb_x*16 ], h->mb.pic.p_fdec[0]+backup_src, 16*sizeof(pixel) );
+ backup_src = (MB_INTERLACED ? 3 : 6) * FDEC_STRIDE;
+ memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 8*sizeof(pixel) );
+ memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16+8], h->mb.pic.p_fdec[2]+backup_src, 8*sizeof(pixel) );
+ }
}
else
{
- h->mc.copy[PIXEL_16x16]( &h->fdec->plane[0][i_pix_offset], i_stride2, h->mb.pic.p_fdec[0], FDEC_STRIDE, 16 );
- memcpy( intra_fdec, h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) );
- twiddle_topleft_pixel( h->mb.pic.p_fdec[0]-FDEC_STRIDE-1, h->mb.pic.p_fdec[0]-FDEC_STRIDE+15, b_interlaced );
+ /* In progressive we update intra_border_backup in-place, so the topleft neighbor will
+ * no longer exist there when load_pic_pointers wants it. Move it within p_fdec instead. */
+ h->mb.pic.p_fdec[0][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[0][-FDEC_STRIDE+15];
+ h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[1][-FDEC_STRIDE+7];
+ h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[2][-FDEC_STRIDE+7];
}
}
@@ -1120,13 +1532,15 @@
int8_t *i4x4 = h->mb.intra4x4_pred_mode[i_mb_xy];
uint8_t *nnz = h->mb.non_zero_count[i_mb_xy];
- if( h->mb.b_interlaced )
+ if( SLICE_MBAFF )
{
+ x264_macroblock_backup_intra( h, h->mb.i_mb_x, h->mb.i_mb_y, 1 );
x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 1 );
x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1 );
}
else
{
+ x264_macroblock_backup_intra( h, h->mb.i_mb_x, h->mb.i_mb_y, 0 );
x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 0 );
x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 0 );
}
@@ -1285,42 +1699,43 @@
void x264_macroblock_bipred_init( x264_t *h )
{
- for( int field = 0; field <= h->sh.b_mbaff; field++ )
- for( int i_ref0 = 0; i_ref0 < (h->i_ref[0]<<h->sh.b_mbaff); i_ref0++ )
- {
- x264_frame_t *l0 = h->fref[0][i_ref0>>h->sh.b_mbaff];
- int poc0 = l0->i_poc + l0->i_delta_poc[field^(i_ref0&1)];
- for( int i_ref1 = 0; i_ref1 < (h->i_ref[1]<<h->sh.b_mbaff); i_ref1++ )
- {
- int dist_scale_factor;
- x264_frame_t *l1 = h->fref[1][i_ref1>>h->sh.b_mbaff];
- int poc1 = l1->i_poc + l1->i_delta_poc[field^(i_ref1&1)];
- int cur_poc = h->fdec->i_poc + h->fdec->i_delta_poc[field];
- int td = x264_clip3( poc1 - poc0, -128, 127 );
- if( td == 0 /* || pic0 is a long-term ref */ )
- dist_scale_factor = 256;
- else
- {
- int tb = x264_clip3( cur_poc - poc0, -128, 127 );
- int tx = (16384 + (abs(td) >> 1)) / td;
- dist_scale_factor = x264_clip3( (tb * tx + 32) >> 6, -1024, 1023 );
- }
-
- h->mb.dist_scale_factor_buf[field][i_ref0][i_ref1] = dist_scale_factor;
-
- dist_scale_factor >>= 2;
- if( h->param.analyse.b_weighted_bipred
- && dist_scale_factor >= -64
- && dist_scale_factor <= 128 )
+ for( int mbfield = 0; mbfield <= SLICE_MBAFF; mbfield++ )
+ for( int field = 0; field <= SLICE_MBAFF; field++ )
+ for( int i_ref0 = 0; i_ref0 < (h->i_ref[0]<<mbfield); i_ref0++ )
+ {
+ x264_frame_t *l0 = h->fref[0][i_ref0>>mbfield];
+ int poc0 = l0->i_poc + mbfield*l0->i_delta_poc[field^(i_ref0&1)];
+ for( int i_ref1 = 0; i_ref1 < (h->i_ref[1]<<mbfield); i_ref1++ )
{
- h->mb.bipred_weight_buf[field][i_ref0][i_ref1] = 64 - dist_scale_factor;
- // ssse3 implementation of biweight doesn't support the extrema.
- // if we ever generate them, we'll have to drop that optimization.
- assert( dist_scale_factor >= -63 && dist_scale_factor <= 127 );
+ int dist_scale_factor;
+ x264_frame_t *l1 = h->fref[1][i_ref1>>mbfield];
+ int cur_poc = h->fdec->i_poc + mbfield*h->fdec->i_delta_poc[field];
+ int poc1 = l1->i_poc + mbfield*l1->i_delta_poc[field^(i_ref1&1)];
+ int td = x264_clip3( poc1 - poc0, -128, 127 );
+ if( td == 0 /* || pic0 is a long-term ref */ )
+ dist_scale_factor = 256;
+ else
+ {
+ int tb = x264_clip3( cur_poc - poc0, -128, 127 );
+ int tx = (16384 + (abs(td) >> 1)) / td;
+ dist_scale_factor = x264_clip3( (tb * tx + 32) >> 6, -1024, 1023 );
+ }
+
+ h->mb.dist_scale_factor_buf[mbfield][field][i_ref0][i_ref1] = dist_scale_factor;
+
+ dist_scale_factor >>= 2;
+ if( h->param.analyse.b_weighted_bipred
+ && dist_scale_factor >= -64
+ && dist_scale_factor <= 128 )
+ {
+ h->mb.bipred_weight_buf[mbfield][field][i_ref0][i_ref1] = 64 - dist_scale_factor;
+ // ssse3 implementation of biweight doesn't support the extrema.
+ // if we ever generate them, we'll have to drop that optimization.
+ assert( dist_scale_factor >= -63 && dist_scale_factor <= 127 );
+ }
+ else
+ h->mb.bipred_weight_buf[mbfield][field][i_ref0][i_ref1] = 32;
}
- else
- h->mb.bipred_weight_buf[field][i_ref0][i_ref1] = 32;
}
- }
}
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/macroblock.h
^
|
@@ -290,8 +290,10 @@
void x264_macroblock_slice_init( x264_t *h );
void x264_macroblock_thread_init( x264_t *h );
-void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y );
-void x264_macroblock_cache_load_deblock( x264_t *h );
+void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y, int b_interlaced );
+void x264_macroblock_cache_load_progressive( x264_t *h, int mb_x, int mb_y );
+void x264_macroblock_cache_load_interlaced( x264_t *h, int mb_x, int mb_y );
+void x264_macroblock_deblock_strength( x264_t *h );
void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_y );
void x264_macroblock_cache_save( x264_t *h );
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/mc.c
^
|
@@ -511,18 +511,17 @@
void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
{
- const int b_interlaced = h->sh.b_mbaff;
- const int stride = frame->i_stride[0] << b_interlaced;
+ const int b_interlaced = PARAM_INTERLACED;
+ int stride = frame->i_stride[0];
const int width = frame->i_width[0];
- int start = (mb_y*16 >> b_interlaced) - 8; // buffer = 4 for deblock + 3 for 6tap, rounded to 8
- int height = ((b_end ? frame->i_lines[0] : mb_y*16) >> b_interlaced) + 8;
+ int start = mb_y*16 - 8; // buffer = 4 for deblock + 3 for 6tap, rounded to 8
+ int height = (b_end ? frame->i_lines[0] + 16*PARAM_INTERLACED : (mb_y+b_interlaced)*16) + 8;
int offs = start*stride - 8; // buffer = 3 for 6tap, aligned to 8 for simd
if( mb_y & b_interlaced )
return;
- for( int y = 0; y <= b_interlaced; y++, offs += frame->i_stride[0] )
- {
+ if( !b_interlaced || h->mb.b_adaptive_mbaff )
h->mc.hpel_filter(
frame->filtered[1] + offs,
frame->filtered[2] + offs,
@@ -530,6 +529,24 @@
frame->plane[0] + offs,
stride, width + 16, height - start,
h->scratch_buffer );
+
+ if( b_interlaced )
+ {
+ /* MC must happen between pixels in the same field. */
+ stride = frame->i_stride[0] << 1;
+ start = (mb_y*16 >> 1) - 8;
+ int height_fld = ((b_end ? frame->i_lines[0] : mb_y*16) >> 1) + 8;
+ offs = start*stride - 8;
+ for( int i = 0; i < 2; i++, offs += frame->i_stride[0] )
+ {
+ h->mc.hpel_filter(
+ frame->filtered_fld[1] + offs,
+ frame->filtered_fld[2] + offs,
+ frame->filtered_fld[3] + offs,
+ frame->plane_fld[0] + offs,
+ stride, width + 16, height_fld - start,
+ h->scratch_buffer );
+ }
}
/* generate integral image:
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/mvpred.c
^
|
@@ -38,12 +38,33 @@
int i_refc = h->mb.cache.ref[i_list][i8 - 8 + i_width];
int16_t *mv_c = h->mb.cache.mv[i_list][i8 - 8 + i_width];
+ // Partitions not yet reached in scan order are unavailable.
if( (idx&3) >= 2 + (i_width&1) || i_refc == -2 )
{
i_refc = h->mb.cache.ref[i_list][i8 - 8 - 1];
mv_c = h->mb.cache.mv[i_list][i8 - 8 - 1];
- }
+ if( SLICE_MBAFF
+ && h->mb.cache.ref[i_list][x264_scan8[0]-1] != -2
+ && MB_INTERLACED != h->mb.field[h->mb.i_mb_left_xy[0]] )
+ {
+ if( idx == 2 )
+ {
+ mv_c = h->mb.cache.topright_mv[i_list][0];
+ i_refc = h->mb.cache.topright_ref[i_list][0];
+ }
+ else if( idx == 8 )
+ {
+ mv_c = h->mb.cache.topright_mv[i_list][1];
+ i_refc = h->mb.cache.topright_ref[i_list][1];
+ }
+ else if( idx == 10 )
+ {
+ mv_c = h->mb.cache.topright_mv[i_list][2];
+ i_refc = h->mb.cache.topright_ref[i_list][2];
+ }
+ }
+ }
if( h->mb.i_partition == D_16x8 )
{
if( idx == 0 )
@@ -161,50 +182,95 @@
static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
{
- int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x;
- int i_mb_8x8 = 4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
- const int type_col = h->fref[1][0]->mb_type[h->mb.i_mb_xy];
- const int partition_col = h->fref[1][0]->mb_partition[h->mb.i_mb_xy];
+ int mb_x = h->mb.i_mb_x;
+ int mb_y = h->mb.i_mb_y;
+ int mb_xy = h->mb.i_mb_xy;
+ int type_col[2] = { h->fref[1][0]->mb_type[mb_xy], h->fref[1][0]->mb_type[mb_xy] };
+ int partition_col[2] = { h->fref[1][0]->mb_partition[mb_xy], h->fref[1][0]->mb_partition[mb_xy] };
+ int preshift = MB_INTERLACED;
+ int postshift = MB_INTERLACED;
+ int offset = 1;
+ int yshift = 1;
+ h->mb.i_partition = partition_col[0];
+ if( PARAM_INTERLACED && h->fref[1][0]->field[mb_xy] != MB_INTERLACED )
+ {
+ if( MB_INTERLACED )
+ {
+ mb_y = h->mb.i_mb_y&~1;
+ mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
+ type_col[0] = h->fref[1][0]->mb_type[mb_xy];
+ type_col[1] = h->fref[1][0]->mb_type[mb_xy + h->mb.i_mb_stride];
+ partition_col[0] = h->fref[1][0]->mb_partition[mb_xy];
+ partition_col[1] = h->fref[1][0]->mb_partition[mb_xy + h->mb.i_mb_stride];
+ preshift = 0;
+ yshift = 0;
+
+ if( (IS_INTRA(type_col[0]) || partition_col[0] == D_16x16) &&
+ (IS_INTRA(type_col[1]) || partition_col[1] == D_16x16) &&
+ partition_col[0] != D_8x8 )
+ h->mb.i_partition = D_16x8;
+ else
+ h->mb.i_partition = D_8x8;
+ }
+ else
+ {
+ int cur_poc = h->fdec->i_poc + h->fdec->i_delta_poc[MB_INTERLACED&h->mb.i_mb_y&1];
+ int col_parity = abs(h->fref[1][0]->i_poc + h->fref[1][0]->i_delta_poc[0] - cur_poc)
+ >= abs(h->fref[1][0]->i_poc + h->fref[1][0]->i_delta_poc[1] - cur_poc);
+ mb_y = (h->mb.i_mb_y&~1) + col_parity;
+ mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
+ type_col[0] = type_col[1] = h->fref[1][0]->mb_type[mb_xy];
+ partition_col[0] = partition_col[1] = h->fref[1][0]->mb_partition[mb_xy];
+ preshift = 1;
+ yshift = 2;
+ h->mb.i_partition = partition_col[0];
+ }
+ offset = 0;
+ }
+ int i_mb_4x4 = 16 * h->mb.i_mb_stride * mb_y + 4 * mb_x;
+ int i_mb_8x8 = 4 * h->mb.i_mb_stride * mb_y + 2 * mb_x;
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
- h->mb.i_partition = partition_col;
-
- if( IS_INTRA( type_col ) )
- {
- x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
- x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, 0 );
- x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, 0 );
- return 1;
- }
-
/* Don't do any checks other than the ones we have to, based
* on the size of the colocated partitions.
* Depends on the enum order: D_8x8, D_16x8, D_8x16, D_16x16 */
- int max_i8 = (D_16x16 - partition_col) + 1;
- int step = (partition_col == D_16x8) + 1;
- int width = 4 >> ((D_16x16 - partition_col)&1);
- int height = 4 >> ((D_16x16 - partition_col)>>1);
-
+ int max_i8 = (D_16x16 - h->mb.i_partition) + 1;
+ int step = (h->mb.i_partition == D_16x8) + 1;
+ int width = 4 >> ((D_16x16 - h->mb.i_partition)&1);
+ int height = 4 >> ((D_16x16 - h->mb.i_partition)>>1);
for( int i8 = 0; i8 < max_i8; i8 += step )
{
int x8 = i8&1;
int y8 = i8>>1;
- int i_part_8x8 = i_mb_8x8 + x8 + y8 * h->mb.i_b8_stride;
+ int ypart = (SLICE_MBAFF && h->fref[1][0]->field[mb_xy] != MB_INTERLACED) ?
+ MB_INTERLACED ? y8*6 : 2*(h->mb.i_mb_y&1) + y8 :
+ 3*y8;
+
+ if( IS_INTRA( type_col[y8] ) )
+ {
+ x264_macroblock_cache_ref( h, 2*x8, 2*y8, width, height, 0, 0 );
+ x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 0, 0 );
+ x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 1, 0 );
+ continue;
+ }
+
+ int i_part_8x8 = i_mb_8x8 + x8 + (ypart>>1) * h->mb.i_b8_stride;
int i_ref1_ref = h->fref[1][0]->ref[0][i_part_8x8];
- int i_ref = (map_col_to_list0(i_ref1_ref>>h->sh.b_mbaff) << h->sh.b_mbaff) + (i_ref1_ref&h->sh.b_mbaff);
+ int i_ref = (map_col_to_list0(i_ref1_ref>>preshift) << postshift) + (offset&i_ref1_ref&MB_INTERLACED);
if( i_ref >= 0 )
{
int dist_scale_factor = h->mb.dist_scale_factor[i_ref][0];
- int16_t *mv_col = h->fref[1][0]->mv[0][i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride];
+ int16_t *mv_col = h->fref[1][0]->mv[0][i_mb_4x4 + 3*x8 + ypart * h->mb.i_b4_stride];
+ int16_t mv_y = (mv_col[1]<<yshift)/2;
int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
- int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
- if( h->param.i_threads > 1 && (l0y > h->mb.mv_max_spel[1] || l0y-mv_col[1] > h->mb.mv_max_spel[1]) )
+ int l0y = ( dist_scale_factor * mv_y + 128 ) >> 8;
+ if( h->param.i_threads > 1 && (l0y > h->mb.mv_max_spel[1] || l0y-mv_y > h->mb.mv_max_spel[1]) )
return 0;
x264_macroblock_cache_ref( h, 2*x8, 2*y8, width, height, 0, i_ref );
x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 0, pack16to32_mask(l0x, l0y) );
- x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
+ x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_y) );
}
else
{
@@ -220,19 +286,10 @@
return 1;
}
-static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
+static ALWAYS_INLINE int x264_mb_predict_mv_direct16x16_spatial( x264_t *h, int b_interlaced )
{
int8_t ref[2];
ALIGNED_ARRAY_8( int16_t, mv,[2],[2] );
- const int8_t *l1ref0 = &h->fref[1][0]->ref[0][h->mb.i_b8_xy];
- const int8_t *l1ref1 = &h->fref[1][0]->ref[1][h->mb.i_b8_xy];
- const int16_t (*l1mv[2])[2] = { (const int16_t (*)[2]) &h->fref[1][0]->mv[0][h->mb.i_b4_xy],
- (const int16_t (*)[2]) &h->fref[1][0]->mv[1][h->mb.i_b4_xy] };
- const int type_col = h->fref[1][0]->mb_type[h->mb.i_mb_xy];
- const int partition_col = h->fref[1][0]->mb_partition[h->mb.i_mb_xy];
-
- h->mb.i_partition = partition_col;
-
for( int i_list = 0; i_list < 2; i_list++ )
{
int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
@@ -277,6 +334,50 @@
ref[i_list] = i_ref;
}
+ int mb_x = h->mb.i_mb_x;
+ int mb_y = h->mb.i_mb_y;
+ int mb_xy = h->mb.i_mb_xy;
+ int type_col[2] = { h->fref[1][0]->mb_type[mb_xy], h->fref[1][0]->mb_type[mb_xy] };
+ int partition_col[2] = { h->fref[1][0]->mb_partition[mb_xy], h->fref[1][0]->mb_partition[mb_xy] };
+ h->mb.i_partition = partition_col[0];
+ if( b_interlaced && h->fref[1][0]->field[mb_xy] != MB_INTERLACED )
+ {
+ if( MB_INTERLACED )
+ {
+ mb_y = h->mb.i_mb_y&~1;
+ mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
+ type_col[0] = h->fref[1][0]->mb_type[mb_xy];
+ type_col[1] = h->fref[1][0]->mb_type[mb_xy + h->mb.i_mb_stride];
+ partition_col[0] = h->fref[1][0]->mb_partition[mb_xy];
+ partition_col[1] = h->fref[1][0]->mb_partition[mb_xy + h->mb.i_mb_stride];
+
+ if( (IS_INTRA(type_col[0]) || partition_col[0] == D_16x16) &&
+ (IS_INTRA(type_col[1]) || partition_col[1] == D_16x16) &&
+ partition_col[0] != D_8x8 )
+ h->mb.i_partition = D_16x8;
+ else
+ h->mb.i_partition = D_8x8;
+ }
+ else
+ {
+ int cur_poc = h->fdec->i_poc + h->fdec->i_delta_poc[MB_INTERLACED&h->mb.i_mb_y&1];
+ int col_parity = abs(h->fref[1][0]->i_poc + h->fref[1][0]->i_delta_poc[0] - cur_poc)
+ >= abs(h->fref[1][0]->i_poc + h->fref[1][0]->i_delta_poc[1] - cur_poc);
+ mb_y = (h->mb.i_mb_y&~1) + col_parity;
+ mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
+ type_col[0] = type_col[1] = h->fref[1][0]->mb_type[mb_xy];
+ partition_col[0] = partition_col[1] = h->fref[1][0]->mb_partition[mb_xy];
+ h->mb.i_partition = partition_col[0];
+ }
+ }
+ int i_mb_4x4 = b_interlaced ? 4 * (h->mb.i_b4_stride*mb_y + mb_x) : h->mb.i_b4_xy ;
+ int i_mb_8x8 = b_interlaced ? 2 * (h->mb.i_b8_stride*mb_y + mb_x) : h->mb.i_b8_xy ;
+
+ int8_t *l1ref0 = &h->fref[1][0]->ref[0][i_mb_8x8];
+ int8_t *l1ref1 = &h->fref[1][0]->ref[1][i_mb_8x8];
+ int16_t (*l1mv[2])[2] = { (int16_t (*)[2]) &h->fref[1][0]->mv[0][i_mb_4x4],
+ (int16_t (*)[2]) &h->fref[1][0]->mv[1][i_mb_4x4] };
+
if( (M16( ref ) & 0x8080) == 0x8080 ) /* if( ref[0] < 0 && ref[1] < 0 ) */
{
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
@@ -296,24 +397,31 @@
return 0;
}
- if( !M64( mv ) || IS_INTRA( type_col ) || (ref[0]&&ref[1]) )
+ if( !M64( mv ) || (!b_interlaced && IS_INTRA( type_col[0] )) || (ref[0]&&ref[1]) )
return 1;
/* Don't do any checks other than the ones we have to, based
* on the size of the colocated partitions.
* Depends on the enum order: D_8x8, D_16x8, D_8x16, D_16x16 */
- int max_i8 = (D_16x16 - partition_col) + 1;
- int step = (partition_col == D_16x8) + 1;
- int width = 4 >> ((D_16x16 - partition_col)&1);
- int height = 4 >> ((D_16x16 - partition_col)>>1);
+ int max_i8 = (D_16x16 - h->mb.i_partition) + 1;
+ int step = (h->mb.i_partition == D_16x8) + 1;
+ int width = 4 >> ((D_16x16 - h->mb.i_partition)&1);
+ int height = 4 >> ((D_16x16 - h->mb.i_partition)>>1);
/* col_zero_flag */
for( int i8 = 0; i8 < max_i8; i8 += step )
{
const int x8 = i8&1;
const int y8 = i8>>1;
- const int o8 = x8 + y8 * h->mb.i_b8_stride;
- const int o4 = 3*(x8 + y8 * h->mb.i_b4_stride);
+ int ypart = (b_interlaced && h->fref[1][0]->field[mb_xy] != MB_INTERLACED) ?
+ MB_INTERLACED ? y8*6 : 2*(h->mb.i_mb_y&1) + y8 :
+ 3*y8;
+ int o8 = x8 + (ypart>>1) * h->mb.i_b8_stride;
+ int o4 = 3*x8 + ypart * h->mb.i_b4_stride;
+
+ if( b_interlaced && IS_INTRA( type_col[y8] ) )
+ continue;
+
int idx;
if( l1ref0[o8] == 0 )
idx = 0;
@@ -332,13 +440,29 @@
return 1;
}
+
+static int x264_mb_predict_mv_direct16x16_spatial_interlaced( x264_t *h )
+{
+ return x264_mb_predict_mv_direct16x16_spatial( h, 1 );
+}
+
+static int x264_mb_predict_mv_direct16x16_spatial_progressive( x264_t *h )
+{
+ return x264_mb_predict_mv_direct16x16_spatial( h, 0 );
+}
+
int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed )
{
int b_available;
if( h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_NONE )
return 0;
else if( h->sh.b_direct_spatial_mv_pred )
- b_available = x264_mb_predict_mv_direct16x16_spatial( h );
+ {
+ if( SLICE_MBAFF )
+ b_available = x264_mb_predict_mv_direct16x16_spatial_interlaced( h );
+ else
+ b_available = x264_mb_predict_mv_direct16x16_spatial_progressive( h );
+ }
else
b_available = x264_mb_predict_mv_direct16x16_temporal( h );
@@ -426,7 +550,7 @@
}
/* spatial predictors */
- SET_MVP( mvr[h->mb.i_mb_left_xy] );
+ SET_MVP( mvr[h->mb.i_mb_left_xy[0]] );
SET_MVP( mvr[h->mb.i_mb_top_xy] );
SET_MVP( mvr[h->mb.i_mb_topleft_xy] );
SET_MVP( mvr[h->mb.i_mb_topright_xy] );
@@ -438,13 +562,13 @@
x264_frame_t *l0 = h->fref[0][0];
int field = h->mb.i_mb_y&1;
int curpoc = h->fdec->i_poc + h->fdec->i_delta_poc[field];
- int refpoc = h->fref[i_list][i_ref>>h->sh.b_mbaff]->i_poc;
+ int refpoc = h->fref[i_list][i_ref>>SLICE_MBAFF]->i_poc;
refpoc += l0->i_delta_poc[field^(i_ref&1)];
#define SET_TMVP( dx, dy ) \
{ \
int mb_index = h->mb.i_mb_xy + dx + dy*h->mb.i_mb_stride; \
- int scale = (curpoc - refpoc) * l0->inv_ref_poc[h->mb.b_interlaced&field]; \
+ int scale = (curpoc - refpoc) * l0->inv_ref_poc[MB_INTERLACED&field]; \
mvc[i][0] = (l0->mv16x16[mb_index][0]*scale + 128) >> 8; \
mvc[i][1] = (l0->mv16x16[mb_index][1]*scale + 128) >> 8; \
i++; \
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/osdep.c
^
|
@@ -24,16 +24,16 @@
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
-#ifndef __MINGW32__
-#include <sys/time.h>
-#else
+#include "common.h"
+
+#if SYS_WINDOWS
#include <sys/types.h>
#include <sys/timeb.h>
+#else
+#include <sys/time.h>
#endif
#include <time.h>
-#include "common.h"
-
#if PTW32_STATIC_LIB
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
@@ -43,14 +43,14 @@
int64_t x264_mdate( void )
{
-#ifndef __MINGW32__
- struct timeval tv_date;
- gettimeofday( &tv_date, NULL );
- return (int64_t)tv_date.tv_sec * 1000000 + (int64_t)tv_date.tv_usec;
-#else
+#if SYS_WINDOWS
struct timeb tb;
ftime( &tb );
return ((int64_t)tb.time * 1000 + (int64_t)tb.millitm) * 1000;
+#else
+ struct timeval tv_date;
+ gettimeofday( &tv_date, NULL );
+ return (int64_t)tv_date.tv_sec * 1000000 + (int64_t)tv_date.tv_usec;
#endif
}
@@ -89,3 +89,35 @@
return 0;
}
#endif
+
+#ifdef __INTEL_COMPILER
+/* Agner's patch to Intel's CPU dispatcher from pages 131-132 of
+ * http://agner.org/optimize/optimizing_cpp.pdf (2011-01-30)
+ * adapted to x264's cpu schema. */
+
+// Global variable indicating cpu
+int __intel_cpu_indicator = 0;
+// CPU dispatcher function
+void __intel_cpu_indicator_init( void )
+{
+ unsigned int cpu = x264_cpu_detect();
+ if( cpu&X264_CPU_AVX )
+ __intel_cpu_indicator = 0x20000;
+ else if( cpu&X264_CPU_SSE42 )
+ __intel_cpu_indicator = 0x8000;
+ else if( cpu&X264_CPU_SSE4 )
+ __intel_cpu_indicator = 0x2000;
+ else if( cpu&X264_CPU_SSSE3 )
+ __intel_cpu_indicator = 0x1000;
+ else if( cpu&X264_CPU_SSE3 )
+ __intel_cpu_indicator = 0x800;
+ else if( cpu&X264_CPU_SSE2 && !(cpu&X264_CPU_SSE2_IS_SLOW) )
+ __intel_cpu_indicator = 0x200;
+ else if( cpu&X264_CPU_SSE )
+ __intel_cpu_indicator = 0x80;
+ else if( cpu&X264_CPU_MMXEXT )
+ __intel_cpu_indicator = 8;
+ else
+ __intel_cpu_indicator = 1;
+}
+#endif
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/osdep.h
^
|
@@ -50,6 +50,25 @@
#include <fcntl.h> // _O_BINARY
#endif
+#ifdef __ICL
+#define inline __inline
+#define strcasecmp _stricmp
+#define strncasecmp _strnicmp
+#define snprintf _snprintf
+#define strtok_r strtok_s
+#define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
+#endif
+
+#ifdef __INTEL_COMPILER
+#include <mathimf.h>
+#else
+#include <math.h>
+#endif
+
+#if (defined(__GNUC__) || defined(__INTEL_COMPILER)) && (ARCH_X86 || ARCH_X86_64)
+#define HAVE_X86_INLINE_ASM 1
+#endif
+
#if !defined(isfinite) && (SYS_OPENBSD || SYS_SunOS)
#define isfinite finite
#endif
@@ -60,7 +79,11 @@
#endif
#endif
+#ifdef __ICL
+#define DECLARE_ALIGNED( var, n ) __declspec(align(n)) var
+#else
#define DECLARE_ALIGNED( var, n ) var __attribute__((aligned(n)))
+#endif
#define ALIGNED_16( var ) DECLARE_ALIGNED( var, 16 )
#define ALIGNED_8( var ) DECLARE_ALIGNED( var, 8 )
#define ALIGNED_4( var ) DECLARE_ALIGNED( var, 4 )
@@ -99,9 +122,14 @@
#define x264_constant_p(x) __builtin_constant_p(x)
#define x264_nonconstant_p(x) (!__builtin_constant_p(x))
#else
-#define UNUSED
+#ifdef __ICL
+#define ALWAYS_INLINE __forceinline
+#define NOINLINE __declspec(noinline)
+#else
#define ALWAYS_INLINE inline
#define NOINLINE
+#endif
+#define UNUSED
#define MAY_ALIAS
#define x264_constant_p(x) 0
#define x264_nonconstant_p(x) 0
@@ -179,19 +207,13 @@
#define asm __asm__
-#if !defined(_WIN64) && !defined(__LP64__)
-#if defined(__INTEL_COMPILER)
-#define BROKEN_STACK_ALIGNMENT 1 /* define it if stack is not mod16 */
-#endif
-#endif
-
#if WORDS_BIGENDIAN
#define endian_fix(x) (x)
#define endian_fix64(x) (x)
#define endian_fix32(x) (x)
#define endian_fix16(x) (x)
#else
-#if defined(__GNUC__) && HAVE_MMX
+#if HAVE_X86_INLINE_ASM && HAVE_MMX
static ALWAYS_INLINE uint32_t endian_fix32( uint32_t x )
{
asm("bswap %0":"+r"(x));
@@ -209,7 +231,7 @@
return (x<<24) + ((x<<8)&0xff0000) + ((x>>8)&0xff00) + (x>>24);
}
#endif
-#if defined(__GNUC__) && ARCH_X86_64
+#if HAVE_X86_INLINE_ASM && ARCH_X86_64
static ALWAYS_INLINE uint64_t endian_fix64( uint64_t x )
{
asm("bswap %0":"+r"(x));
@@ -260,7 +282,7 @@
}
#endif
-#if defined(__GNUC__) && HAVE_MMX
+#if HAVE_X86_INLINE_ASM && HAVE_MMX
/* Don't use __builtin_prefetch; even as recent as 4.3.4, GCC seems incapable of
* using complex address modes properly unless we use inline asm. */
static ALWAYS_INLINE void x264_prefetch( void *p )
@@ -277,7 +299,7 @@
#endif
#if HAVE_POSIXTHREAD
-#if SYS_MINGW
+#if SYS_WINDOWS
#define x264_lower_thread_priority(p)\
{\
x264_pthread_t handle = pthread_self();\
@@ -290,7 +312,7 @@
#else
#include <unistd.h>
#define x264_lower_thread_priority(p) { UNUSED int nice_ret = nice(p); }
-#endif /* SYS_MINGW */
+#endif /* SYS_WINDOWS */
#elif HAVE_WIN32THREAD
#define x264_lower_thread_priority(p) SetThreadPriority( GetCurrentThread(), X264_MAX( -2, -p ) )
#else
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/pixel.c
^
|
@@ -641,6 +641,36 @@
return ssim;
}
+int pixel_vsad( pixel *src, int stride, int height )
+{
+ int score = 0;
+ for( int i = 1; i < height; i++, src += stride )
+ for( int j = 0; j < 16; j++ )
+ score += abs(src[j] - src[j+stride]);
+ return score;
+}
+
+int x264_field_vsad( x264_t *h, int mb_x, int mb_y )
+{
+ int score_field, score_frame;
+ int stride = h->fenc->i_stride[0];
+ int mb_stride = h->mb.i_mb_stride;
+ pixel *fenc = h->fenc->plane[0] + 16 * (mb_x + mb_y * stride);
+ int mb_xy = mb_x + mb_y*mb_stride;
+
+ /* We don't want to analyze pixels outside the frame, as it gives inaccurate results. */
+ int mbpair_height = X264_MIN( h->param.i_height - mb_y * 16, 32 );
+ score_frame = h->pixf.vsad( fenc, stride, mbpair_height );
+ score_field = h->pixf.vsad( fenc, stride*2, mbpair_height >> 1 );
+ score_field += h->pixf.vsad( fenc+stride, stride*2, mbpair_height >> 1 );
+
+ if( mb_x > 0 )
+ score_field += 512 - h->mb.field[mb_xy -1]*1024;
+ if( mb_y > 0 )
+ score_field += 512 - h->mb.field[mb_xy-mb_stride]*1024;
+
+ return (score_field < score_frame);
+}
/****************************************************************************
* successive elimination
@@ -746,6 +776,7 @@
pixf->ssim_4x4x2_core = ssim_4x4x2_core;
pixf->ssim_end4 = ssim_end4;
pixf->var2_8x8 = pixel_var2_8x8;
+ pixf->vsad = pixel_vsad;
pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4;
pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4;
@@ -873,6 +904,7 @@
pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext;
pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_mmxext;
pixf->var2_8x8 = x264_pixel_var2_8x8_mmxext;
+ pixf->vsad = x264_pixel_vsad_mmxext;
if( cpu&X264_CPU_CACHELINE_32 )
{
@@ -921,6 +953,7 @@
pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
#endif
pixf->var2_8x8 = x264_pixel_var2_8x8_sse2;
+ pixf->vsad = x264_pixel_vsad_sse2;
}
if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_SSE2_IS_SLOW) )
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/pixel.h
^
|
@@ -47,10 +47,12 @@
PIXEL_2x2 = 9,
};
-static const struct {
+static const struct
+{
int w;
int h;
-} x264_pixel_size[7] = {
+} x264_pixel_size[7] =
+{
{ 16, 16 },
{ 16, 8 }, { 8, 16 },
{ 8, 8 },
@@ -58,7 +60,8 @@
{ 4, 4 }
};
-static const uint8_t x264_size2pixel[5][5] = {
+static const uint8_t x264_size2pixel[5][5] =
+{
{ 0, },
{ 0, PIXEL_4x4, PIXEL_8x4, 0, 0 },
{ 0, PIXEL_4x8, PIXEL_8x8, 0, PIXEL_16x8 },
@@ -79,6 +82,7 @@
x264_pixel_cmp_x3_t fpelcmp_x3[7];
x264_pixel_cmp_x4_t fpelcmp_x4[7];
x264_pixel_cmp_t sad_aligned[7]; /* Aligned SAD for mbcmp */
+ int (*vsad)( pixel *, int, int );
int (*var2_8x8)( pixel *, int, pixel *, int, int * );
uint64_t (*var[4])( pixel *pix, int stride );
@@ -122,5 +126,6 @@
void x264_pixel_ssd_nv12( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height, uint64_t *ssd_u, uint64_t *ssd_v );
uint64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height );
float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height, void *buf );
+int x264_field_vsad( x264_t *h, int mb_x, int mb_y );
#endif
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/ppc/mc.c
^
|
@@ -856,6 +856,318 @@
dstc += dst_stride;
}
}
+
+static void mc_weight_w2_altivec( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
+ const x264_weight_t *weight, int i_height )
+{
+ LOAD_ZERO;
+ PREP_LOAD;
+ PREP_LOAD_SRC( src );
+ vec_u8_t srcv;
+ vec_s16_t weightv;
+ vec_s16_t scalev, offsetv, denomv, roundv;
+ vec_s16_u loadv;
+
+ int denom = weight->i_denom;
+
+ loadv.s[0] = weight->i_scale;
+ scalev = vec_splat( loadv.v, 0 );
+
+ loadv.s[0] = weight->i_offset;
+ offsetv = vec_splat( loadv.v, 0 );
+
+ if( denom >= 1 )
+ {
+ loadv.s[0] = denom;
+ denomv = vec_splat( loadv.v, 0 );
+
+ loadv.s[0] = 1<<(denom - 1);
+ roundv = vec_splat( loadv.v, 0 );
+
+ for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
+ {
+ VEC_LOAD( src, srcv, 2, vec_u8_t, src );
+ weightv = vec_u8_to_s16( srcv );
+
+ weightv = vec_mladd( weightv, scalev, roundv );
+ weightv = vec_sra( weightv, (vec_u16_t)denomv );
+ weightv = vec_add( weightv, offsetv );
+
+ srcv = vec_packsu( weightv, zero_s16v );
+ vec_ste( vec_splat( (vec_u16_t)srcv, 0 ), 0, (uint16_t*)dst );
+ }
+ }
+ else
+ {
+ for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
+ {
+ VEC_LOAD( src, srcv, 2, vec_u8_t, src );
+ weightv = vec_u8_to_s16( srcv );
+
+ weightv = vec_mladd( weightv, scalev, offsetv );
+
+ srcv = vec_packsu( weightv, zero_s16v );
+ vec_ste( vec_splat( (vec_u16_t)srcv, 0 ), 0, (uint16_t*)dst );
+ }
+ }
+}
+static void mc_weight_w4_altivec( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
+ const x264_weight_t *weight, int i_height )
+{
+ LOAD_ZERO;
+ PREP_LOAD;
+ PREP_LOAD_SRC( src );
+ vec_u8_t srcv;
+ vec_s16_t weightv;
+ vec_s16_t scalev, offsetv, denomv, roundv;
+ vec_s16_u loadv;
+
+ int denom = weight->i_denom;
+
+ loadv.s[0] = weight->i_scale;
+ scalev = vec_splat( loadv.v, 0 );
+
+ loadv.s[0] = weight->i_offset;
+ offsetv = vec_splat( loadv.v, 0 );
+
+ if( denom >= 1 )
+ {
+ loadv.s[0] = denom;
+ denomv = vec_splat( loadv.v, 0 );
+
+ loadv.s[0] = 1<<(denom - 1);
+ roundv = vec_splat( loadv.v, 0 );
+
+ for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
+ {
+ VEC_LOAD( src, srcv, 4, vec_u8_t, src );
+ weightv = vec_u8_to_s16( srcv );
+
+ weightv = vec_mladd( weightv, scalev, roundv );
+ weightv = vec_sra( weightv, (vec_u16_t)denomv );
+ weightv = vec_add( weightv, offsetv );
+
+ srcv = vec_packsu( weightv, zero_s16v );
+ vec_ste( vec_splat( (vec_u32_t)srcv, 0 ), 0, (uint32_t*)dst );
+ }
+ }
+ else
+ {
+ for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
+ {
+ VEC_LOAD( src, srcv, 4, vec_u8_t, src );
+ weightv = vec_u8_to_s16( srcv );
+
+ weightv = vec_mladd( weightv, scalev, offsetv );
+
+ srcv = vec_packsu( weightv, zero_s16v );
+ vec_ste( vec_splat( (vec_u32_t)srcv, 0 ), 0, (uint32_t*)dst );
+ }
+ }
+}
+static void mc_weight_w8_altivec( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
+ const x264_weight_t *weight, int i_height )
+{
+ LOAD_ZERO;
+ PREP_LOAD;
+ PREP_LOAD_SRC( src );
+ PREP_STORE8;
+ vec_u8_t srcv;
+ vec_s16_t weightv;
+ vec_s16_t scalev, offsetv, denomv, roundv;
+ vec_s16_u loadv;
+
+ int denom = weight->i_denom;
+
+ loadv.s[0] = weight->i_scale;
+ scalev = vec_splat( loadv.v, 0 );
+
+ loadv.s[0] = weight->i_offset;
+ offsetv = vec_splat( loadv.v, 0 );
+
+ if( denom >= 1 )
+ {
+ loadv.s[0] = denom;
+ denomv = vec_splat( loadv.v, 0 );
+
+ loadv.s[0] = 1<<(denom - 1);
+ roundv = vec_splat( loadv.v, 0 );
+
+ for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
+ {
+ VEC_LOAD( src, srcv, 8, vec_u8_t, src );
+ weightv = vec_u8_to_s16( srcv );
+
+ weightv = vec_mladd( weightv, scalev, roundv );
+ weightv = vec_sra( weightv, (vec_u16_t)denomv );
+ weightv = vec_add( weightv, offsetv );
+
+ srcv = vec_packsu( weightv, zero_s16v );
+ VEC_STORE8( srcv, dst );
+ }
+ }
+ else
+ {
+ for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
+ {
+ VEC_LOAD( src, srcv, 8, vec_u8_t, src );
+ weightv = vec_u8_to_s16( srcv );
+
+ weightv = vec_mladd( weightv, scalev, offsetv );
+
+ srcv = vec_packsu( weightv, zero_s16v );
+ VEC_STORE8( srcv, dst );
+ }
+ }
+}
+static void mc_weight_w16_altivec( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
+ const x264_weight_t *weight, int i_height )
+{
+ LOAD_ZERO;
+ PREP_LOAD;
+ PREP_LOAD_SRC( src );
+ vec_u8_t srcv;
+ vec_s16_t weight_lv, weight_hv;
+ vec_s16_t scalev, offsetv, denomv, roundv;
+ vec_s16_u loadv;
+
+ int denom = weight->i_denom;
+
+ loadv.s[0] = weight->i_scale;
+ scalev = vec_splat( loadv.v, 0 );
+
+ loadv.s[0] = weight->i_offset;
+ offsetv = vec_splat( loadv.v, 0 );
+
+ if( denom >= 1 )
+ {
+ loadv.s[0] = denom;
+ denomv = vec_splat( loadv.v, 0 );
+
+ loadv.s[0] = 1<<(denom - 1);
+ roundv = vec_splat( loadv.v, 0 );
+
+ for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
+ {
+ VEC_LOAD( src, srcv, 16, vec_u8_t, src );
+ weight_hv = vec_u8_to_s16_h( srcv );
+ weight_lv = vec_u8_to_s16_l( srcv );
+
+ weight_hv = vec_mladd( weight_hv, scalev, roundv );
+ weight_lv = vec_mladd( weight_lv, scalev, roundv );
+ weight_hv = vec_sra( weight_hv, (vec_u16_t)denomv );
+ weight_lv = vec_sra( weight_lv, (vec_u16_t)denomv );
+ weight_hv = vec_add( weight_hv, offsetv );
+ weight_lv = vec_add( weight_lv, offsetv );
+
+ srcv = vec_packsu( weight_hv, weight_lv );
+ vec_st( srcv, 0, dst );
+ }
+ }
+ else
+ {
+ for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
+ {
+ VEC_LOAD( src, srcv, 16, vec_u8_t, src );
+ weight_hv = vec_u8_to_s16_h( srcv );
+ weight_lv = vec_u8_to_s16_l( srcv );
+
+ weight_hv = vec_mladd( weight_hv, scalev, offsetv );
+ weight_lv = vec_mladd( weight_lv, scalev, offsetv );
+
+ srcv = vec_packsu( weight_hv, weight_lv );
+ vec_st( srcv, 0, dst );
+ }
+ }
+}
+static void mc_weight_w20_altivec( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
+ const x264_weight_t *weight, int i_height )
+{
+ LOAD_ZERO;
+ PREP_LOAD_SRC( src );
+ vec_u8_t src_1v, src_2v, src_3v;
+ vec_s16_t weight_lv, weight_hv, weight_3v;
+ vec_s16_t scalev, offsetv, denomv, roundv;
+ vec_s16_u loadv;
+
+ int denom = weight->i_denom;
+
+ loadv.s[0] = weight->i_scale;
+ scalev = vec_splat( loadv.v, 0 );
+
+ loadv.s[0] = weight->i_offset;
+ offsetv = vec_splat( loadv.v, 0 );
+
+ if( denom >= 1 )
+ {
+ loadv.s[0] = denom;
+ denomv = vec_splat( loadv.v, 0 );
+
+ loadv.s[0] = 1<<(denom - 1);
+ roundv = vec_splat( loadv.v, 0 );
+
+ for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
+ {
+ src_1v = vec_ld( 0, src );
+ src_2v = vec_ld( 16, src );
+ src_3v = vec_ld( 19, src );
+ src_1v = vec_perm( src_1v, src_2v, _src_ );
+ src_3v = vec_perm( src_2v, src_3v, _src_ );
+ weight_hv = vec_u8_to_s16_h( src_1v );
+ weight_lv = vec_u8_to_s16_l( src_1v );
+ weight_3v = vec_u8_to_s16_h( src_3v );
+
+ weight_hv = vec_mladd( weight_hv, scalev, roundv );
+ weight_lv = vec_mladd( weight_lv, scalev, roundv );
+ weight_3v = vec_mladd( weight_3v, scalev, roundv );
+ weight_hv = vec_sra( weight_hv, (vec_u16_t)denomv );
+ weight_lv = vec_sra( weight_lv, (vec_u16_t)denomv );
+ weight_3v = vec_sra( weight_3v, (vec_u16_t)denomv );
+ weight_hv = vec_add( weight_hv, offsetv );
+ weight_lv = vec_add( weight_lv, offsetv );
+ weight_3v = vec_add( weight_3v, offsetv );
+
+ src_1v = vec_packsu( weight_hv, weight_lv );
+ src_3v = vec_packsu( weight_3v, zero_s16v );
+ vec_st( src_1v, 0, dst );
+ vec_ste( (vec_u32_t)src_3v, 16, (uint32_t*)dst );
+ }
+ }
+ else
+ {
+ for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
+ {
+ src_1v = vec_ld( 0, src );
+ src_2v = vec_ld( 16, src );
+ src_3v = vec_ld( 19, src );
+ src_1v = vec_perm( src_1v, src_2v, _src_ );
+ src_3v = vec_perm( src_2v, src_3v, _src_ );
+ weight_hv = vec_u8_to_s16_h( src_1v );
+ weight_lv = vec_u8_to_s16_l( src_1v );
+ weight_3v = vec_u8_to_s16_h( src_3v );
+
+ weight_hv = vec_mladd( weight_hv, scalev, offsetv );
+ weight_lv = vec_mladd( weight_lv, scalev, offsetv );
+ weight_3v = vec_mladd( weight_3v, scalev, offsetv );
+
+ src_1v = vec_packsu( weight_hv, weight_lv );
+ src_3v = vec_packsu( weight_3v, zero_s16v );
+ vec_st( src_1v, 0, dst );
+ vec_ste( (vec_u32_t)src_3v, 16, (uint32_t*)dst );
+ }
+ }
+}
+
+static weight_fn_t x264_mc_weight_wtab_altivec[6] =
+{
+ mc_weight_w2_altivec,
+ mc_weight_w4_altivec,
+ mc_weight_w8_altivec,
+ mc_weight_w16_altivec,
+ mc_weight_w16_altivec,
+ mc_weight_w20_altivec,
+};
+
#endif // !HIGH_BIT_DEPTH
void x264_mc_altivec_init( x264_mc_functions_t *pf )
@@ -870,5 +1182,7 @@
pf->hpel_filter = x264_hpel_filter_altivec;
pf->frame_init_lowres_core = frame_init_lowres_core_altivec;
+
+ pf->weight = x264_mc_weight_wtab_altivec;
#endif // !HIGH_BIT_DEPTH
}
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/ppc/pixel.c
^
|
@@ -26,6 +26,7 @@
#include "common/common.h"
#include "ppccommon.h"
+#include "../predict.h"
#if !HIGH_BIT_DEPTH
/***********************************************************************
@@ -1983,6 +1984,61 @@
sums[0][3] = temp[0];
sums[1][3] = temp[1];
}
+
+#define SATD_X( size ) \
+static void pixel_satd_x3_##size##_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] )\
+{\
+ scores[0] = pixel_satd_##size##_altivec( fenc, FENC_STRIDE, pix0, i_stride );\
+ scores[1] = pixel_satd_##size##_altivec( fenc, FENC_STRIDE, pix1, i_stride );\
+ scores[2] = pixel_satd_##size##_altivec( fenc, FENC_STRIDE, pix2, i_stride );\
+}\
+static void pixel_satd_x4_##size##_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )\
+{\
+ scores[0] = pixel_satd_##size##_altivec( fenc, FENC_STRIDE, pix0, i_stride );\
+ scores[1] = pixel_satd_##size##_altivec( fenc, FENC_STRIDE, pix1, i_stride );\
+ scores[2] = pixel_satd_##size##_altivec( fenc, FENC_STRIDE, pix2, i_stride );\
+ scores[3] = pixel_satd_##size##_altivec( fenc, FENC_STRIDE, pix3, i_stride );\
+}
+SATD_X( 16x16 )\
+SATD_X( 16x8 )\
+SATD_X( 8x16 )\
+SATD_X( 8x8 )\
+SATD_X( 8x4 )\
+SATD_X( 4x8 )\
+SATD_X( 4x4 )
+
+
+#define INTRA_MBCMP_8x8( mbcmp )\
+void intra_##mbcmp##_x3_8x8_altivec( uint8_t *fenc, uint8_t edge[33], int res[3] )\
+{\
+ ALIGNED_8( uint8_t pix[8*FDEC_STRIDE] );\
+ x264_predict_8x8_v_c( pix, edge );\
+ res[0] = pixel_##mbcmp##_8x8_altivec( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
+ x264_predict_8x8_h_c( pix, edge );\
+ res[1] = pixel_##mbcmp##_8x8_altivec( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
+ x264_predict_8x8_dc_c( pix, edge );\
+ res[2] = pixel_##mbcmp##_8x8_altivec( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
+}
+
+INTRA_MBCMP_8x8(sad)
+INTRA_MBCMP_8x8(sa8d)
+
+#define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma )\
+void intra_##mbcmp##_x3_##size##x##size##chroma##_altivec( uint8_t *fenc, uint8_t *fdec, int res[3] )\
+{\
+ x264_predict_##size##x##size##chroma##_##pred1##_c( fdec );\
+ res[0] = pixel_##mbcmp##_##size##x##size##_altivec( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
+ x264_predict_##size##x##size##chroma##_##pred2##_c( fdec );\
+ res[1] = pixel_##mbcmp##_##size##x##size##_altivec( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
+ x264_predict_##size##x##size##chroma##_##pred3##_c( fdec );\
+ res[2] = pixel_##mbcmp##_##size##x##size##_altivec( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
+}
+
+INTRA_MBCMP(satd, 4, v, h, dc, )
+INTRA_MBCMP(sad, 8, dc, h, v, c )
+INTRA_MBCMP(satd, 8, dc, h, v, c )
+INTRA_MBCMP(sad, 16, v, h, dc, )
+INTRA_MBCMP(satd, 16, v, h, dc, )
#endif // !HIGH_BIT_DEPTH
/****************************************************************************
@@ -2014,12 +2070,38 @@
pixf->satd[PIXEL_4x8] = pixel_satd_4x8_altivec;
pixf->satd[PIXEL_4x4] = pixel_satd_4x4_altivec;
+ pixf->satd_x3[PIXEL_16x16] = pixel_satd_x3_16x16_altivec;
+ pixf->satd_x3[PIXEL_8x16] = pixel_satd_x3_8x16_altivec;
+ pixf->satd_x3[PIXEL_16x8] = pixel_satd_x3_16x8_altivec;
+ pixf->satd_x3[PIXEL_8x8] = pixel_satd_x3_8x8_altivec;
+ pixf->satd_x3[PIXEL_8x4] = pixel_satd_x3_8x4_altivec;
+ pixf->satd_x3[PIXEL_4x8] = pixel_satd_x3_4x8_altivec;
+ pixf->satd_x3[PIXEL_4x4] = pixel_satd_x3_4x4_altivec;
+
+ pixf->satd_x4[PIXEL_16x16] = pixel_satd_x4_16x16_altivec;
+ pixf->satd_x4[PIXEL_8x16] = pixel_satd_x4_8x16_altivec;
+ pixf->satd_x4[PIXEL_16x8] = pixel_satd_x4_16x8_altivec;
+ pixf->satd_x4[PIXEL_8x8] = pixel_satd_x4_8x8_altivec;
+ pixf->satd_x4[PIXEL_8x4] = pixel_satd_x4_8x4_altivec;
+ pixf->satd_x4[PIXEL_4x8] = pixel_satd_x4_4x8_altivec;
+ pixf->satd_x4[PIXEL_4x4] = pixel_satd_x4_4x4_altivec;
+
+ pixf->intra_sad_x3_8x8 = intra_sad_x3_8x8_altivec;
+ pixf->intra_sad_x3_8x8c = intra_sad_x3_8x8c_altivec;
+ pixf->intra_sad_x3_16x16 = intra_sad_x3_16x16_altivec;
+
+ pixf->intra_satd_x3_4x4 = intra_satd_x3_4x4_altivec;
+ pixf->intra_satd_x3_8x8c = intra_satd_x3_8x8c_altivec;
+ pixf->intra_satd_x3_16x16 = intra_satd_x3_16x16_altivec;
+
pixf->ssd[PIXEL_16x16] = pixel_ssd_16x16_altivec;
pixf->ssd[PIXEL_8x8] = pixel_ssd_8x8_altivec;
pixf->sa8d[PIXEL_16x16] = pixel_sa8d_16x16_altivec;
pixf->sa8d[PIXEL_8x8] = pixel_sa8d_8x8_altivec;
+ pixf->intra_sa8d_x3_8x8 = intra_sa8d_x3_8x8_altivec;
+
pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_altivec;
pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_altivec;
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/quant.c
^
|
@@ -141,6 +141,66 @@
}
}
+static ALWAYS_INLINE void idct_dequant_2x2_dconly( dctcoef out[4], dctcoef dct[4], int dequant_mf )
+{
+ int d0 = dct[0] + dct[1];
+ int d1 = dct[2] + dct[3];
+ int d2 = dct[0] - dct[1];
+ int d3 = dct[2] - dct[3];
+ out[0] = (d0 + d1) * dequant_mf >> 5;
+ out[1] = (d0 - d1) * dequant_mf >> 5;
+ out[2] = (d2 + d3) * dequant_mf >> 5;
+ out[3] = (d2 - d3) * dequant_mf >> 5;
+}
+
+static ALWAYS_INLINE int idct_dequant_round_2x2_dc( dctcoef ref[4], dctcoef dct[4], int dequant_mf )
+{
+ dctcoef out[4];
+ idct_dequant_2x2_dconly( out, dct, dequant_mf );
+ return ((ref[0] ^ (out[0]+32))
+ | (ref[1] ^ (out[1]+32))
+ | (ref[2] ^ (out[2]+32))
+ | (ref[3] ^ (out[3]+32))) >> 6;
+}
+
+static int optimize_chroma_dc( dctcoef dct[4], int dequant_mf )
+{
+ /* dequant_mf = h->dequant4_mf[CQM_4IC + b_inter][i_qp%6][0] << i_qp/6, max 32*64 */
+ dctcoef dct_orig[4];
+ int coeff, nz;
+
+ idct_dequant_2x2_dconly( dct_orig, dct, dequant_mf );
+ dct_orig[0] += 32;
+ dct_orig[1] += 32;
+ dct_orig[2] += 32;
+ dct_orig[3] += 32;
+
+ /* If the DC coefficients already round to zero, terminate early. */
+ if( !((dct_orig[0]|dct_orig[1]|dct_orig[2]|dct_orig[3])>>6) )
+ return 0;
+
+ /* Start with the highest frequency coefficient... is this the best option? */
+ for( nz = 0, coeff = 3; coeff >= 0; coeff-- )
+ {
+ int level = dct[coeff];
+ int sign = level>>31 | 1; /* dct2x2[coeff] < 0 ? -1 : 1 */
+
+ while( level )
+ {
+ dct[coeff] = level - sign;
+ if( idct_dequant_round_2x2_dc( dct_orig, dct, dequant_mf ) )
+ {
+ nz = 1;
+ dct[coeff] = level;
+ break;
+ }
+ level -= sign;
+ }
+ }
+
+ return nz;
+}
+
static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size )
{
for( int i = 0; i < size; i++ )
@@ -272,6 +332,8 @@
pf->dequant_4x4_dc = dequant_4x4_dc;
pf->dequant_8x8 = dequant_8x8;
+ pf->optimize_chroma_dc = optimize_chroma_dc;
+
pf->denoise_dct = x264_denoise_dct;
pf->decimate_score15 = x264_decimate_score15;
pf->decimate_score16 = x264_decimate_score16;
@@ -427,6 +489,7 @@
pf->dequant_4x4 = x264_dequant_4x4_flat16_sse2;
pf->dequant_8x8 = x264_dequant_8x8_flat16_sse2;
}
+ pf->optimize_chroma_dc = x264_optimize_chroma_dc_sse2;
pf->denoise_dct = x264_denoise_dct_sse2;
pf->decimate_score15 = x264_decimate_score15_sse2;
pf->decimate_score16 = x264_decimate_score16_sse2;
@@ -457,6 +520,7 @@
pf->quant_4x4_dc = x264_quant_4x4_dc_ssse3;
pf->quant_4x4 = x264_quant_4x4_ssse3;
pf->quant_8x8 = x264_quant_8x8_ssse3;
+ pf->optimize_chroma_dc = x264_optimize_chroma_dc_ssse3;
pf->denoise_dct = x264_denoise_dct_ssse3;
pf->decimate_score15 = x264_decimate_score15_ssse3;
pf->decimate_score16 = x264_decimate_score16_ssse3;
@@ -473,6 +537,7 @@
pf->quant_4x4_dc = x264_quant_4x4_dc_sse4;
pf->quant_4x4 = x264_quant_4x4_sse4;
pf->quant_8x8 = x264_quant_8x8_sse4;
+ pf->optimize_chroma_dc = x264_optimize_chroma_dc_sse4;
}
if( cpu&X264_CPU_AVX )
@@ -480,6 +545,7 @@
pf->dequant_4x4 = x264_dequant_4x4_avx;
pf->dequant_8x8 = x264_dequant_8x8_avx;
pf->dequant_4x4_dc = x264_dequant_4x4dc_avx;
+ pf->optimize_chroma_dc = x264_optimize_chroma_dc_avx;
pf->denoise_dct = x264_denoise_dct_avx;
}
#endif // HAVE_MMX
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/quant.h
^
|
@@ -38,6 +38,8 @@
void (*dequant_4x4)( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
void (*dequant_4x4_dc)( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
+ int (*optimize_chroma_dc)( dctcoef dct[4], int dequant_mf );
+
void (*denoise_dct)( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size );
int (*decimate_score15)( dctcoef *dct );
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/rectangle.h
^
|
@@ -80,6 +80,15 @@
{
/* height 1, width 16 doesn't occur */
assert( h != 1 );
+#if HAVE_VECTOREXT && defined(__SSE__)
+ v4si v16 = {v,v,v,v};
+
+ M128( d+s*0+0 ) = (__m128)v16;
+ M128( d+s*1+0 ) = (__m128)v16;
+ if( h == 2 ) return;
+ M128( d+s*2+0 ) = (__m128)v16;
+ M128( d+s*3+0 ) = (__m128)v16;
+#else
if( WORD_SIZE == 8 )
{
do
@@ -103,6 +112,7 @@
d += s;
} while( --h );
}
+#endif
}
else
assert(0);
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/set.c
^
|
@@ -24,7 +24,6 @@
*****************************************************************************/
#define _ISOC99_SOURCE
-#include <math.h>
#include "common.h"
#define SHIFT(x,s) ((s)<=0 ? (x)<<-(s) : ((x)+(1<<((s)-1)))>>(s))
@@ -196,7 +195,7 @@
int dct8x8 = cat == 1;
int size = dct8x8 ? 64 : 16;
udctcoef *nr_offset = h->nr_offset_emergency[q][cat];
- /* Denoise chroma first (due to h264's chroma QP offset, then luma, then DC. */
+ /* Denoise chroma first (due to h264's chroma QP offset), then luma, then DC. */
int dc_threshold = (QP_MAX-QP_MAX_SPEC)*2/3;
int luma_threshold = (QP_MAX-QP_MAX_SPEC)*2/3;
int chroma_threshold = 0;
@@ -237,6 +236,10 @@
h->param.rc.i_qp_max = min_qp_err-1;
if( max_qp_err >= h->param.rc.i_qp_min )
h->param.rc.i_qp_min = max_qp_err+1;
+ /* If long level-codes aren't allowed, we need to allow QP high enough to avoid them. */
+ if( !h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH )
+ while( h->chroma_qp_table[SPEC_QP(h->param.rc.i_qp_max)] <= 12 || h->param.rc.i_qp_max <= 12 )
+ h->param.rc.i_qp_max++;
if( h->param.rc.i_qp_min > h->param.rc.i_qp_max )
{
x264_log( h, X264_LOG_ERROR, "Impossible QP constraints for CQM (min=%d, max=%d)\n", h->param.rc.i_qp_min, h->param.rc.i_qp_max );
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/vlc.c
^
|
@@ -695,7 +695,7 @@
vlc_large_t x264_level_token[7][LEVEL_TABLE_SIZE];
-void x264_init_vlc_tables( void )
+void x264_cavlc_init( void )
{
for( int i_suffix = 0; i_suffix < 7; i_suffix++ )
for( int16_t level = -LEVEL_TABLE_SIZE/2; level < LEVEL_TABLE_SIZE/2; level++ )
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/win32thread.c
^
|
@@ -59,7 +59,7 @@
static x264_win32thread_control_t thread_control;
/* _beginthreadex requires that the start routine is __stdcall */
-static __stdcall unsigned x264_win32thread_worker( void *arg )
+static unsigned __stdcall x264_win32thread_worker( void *arg )
{
x264_pthread_t *h = arg;
h->ret = h->func( h->arg );
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/x86/cabac-a.asm
^
|
@@ -35,13 +35,13 @@
; t3 must be ecx, since it's used for shift.
%ifdef WIN64
- DECLARE_REG_TMP 3,1,2,0,4,5,6,10,2
+ DECLARE_REG_TMP 3,1,2,0,4,5,6,2
%define pointer resq
%elifdef ARCH_X86_64
- DECLARE_REG_TMP 0,1,2,3,4,5,6,10,6
+ DECLARE_REG_TMP 0,1,2,3,4,5,6,6
%define pointer resq
%else
- DECLARE_REG_TMP 0,4,2,1,3,5,6,2,2
+ DECLARE_REG_TMP 0,4,2,1,3,5,6,2
%define pointer resd
%endif
@@ -75,21 +75,21 @@
movifnidn t0, r0mp
movifnidn t1d, r1m
mov t5d, [t0+cb.range]
- movzx t4d, byte [t0+cb.state+t1]
+ movzx t6d, byte [t0+cb.state+t1]
+ mov t4d, ~1
mov t3d, t5d
- mov t6d, t4d
+ and t4d, t6d
shr t5d, 6
- shr t4d, 1
movifnidn t2d, r2m
- LOAD_GLOBAL t5d, cabac_range_lps-4, t5, t4*4
+ LOAD_GLOBAL t5d, cabac_range_lps-4, t5, t4*2
LOAD_GLOBAL t4d, cabac_transition, t2, t6*2
and t6d, 1
sub t3d, t5d
cmp t6d, t2d
mov t6d, [t0+cb.low]
- lea t7, [t6+t3]
+ lea t2, [t6+t3]
cmovne t3d, t5d
- cmovne t6d, t7d
+ cmovne t6d, t2d
mov [t0+cb.state+t1], t4b
;cabac_encode_renorm
mov t4d, t3d
@@ -108,10 +108,9 @@
cglobal cabac_encode_bypass_asm, 0,3
movifnidn t0, r0mp
movifnidn t3d, r1m
- neg t3d
- mov t8d, [t0+cb.low]
+ mov t7d, [t0+cb.low]
and t3d, [t0+cb.range]
- lea t8d, [t8*2+t3]
+ lea t7d, [t7*2+t3]
mov t3d, [t0+cb.queue]
inc t3d
%ifdef UNIX64 ; .putbyte compiles to nothing but a jmp
@@ -119,12 +118,12 @@
%else
jge .putbyte
%endif
- mov [t0+cb.low], t8d
+ mov [t0+cb.low], t7d
mov [t0+cb.queue], t3d
RET
.putbyte:
PROLOGUE 0,7
- movifnidn t6d, t8d
+ movifnidn t6d, t7d
jmp cabac_putbyte
cglobal cabac_encode_terminal_asm, 0,3
@@ -163,7 +162,7 @@
mov t5d, [t0+cb.bytes_outstanding]
cmp t2b, 0xff ; FIXME is a 32bit op faster?
jz .postpone
- mov t1, [t0+cb.p]
+ mov t1, [t0+cb.p]
add [t1-1], dh ; t2h
dec dh
.loop_outstanding:
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/x86/const-a.asm
^
|
@@ -51,6 +51,7 @@
const pd_1, times 4 dd 1
const pd_32, times 4 dd 32
+const pd_1024, times 4 dd 1024
const pd_ffff, times 4 dd 0xffff
const pw_00ff, times 8 dw 0x00ff
const pw_ff00, times 8 dw 0xff00
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/x86/deblock-a.asm
^
|
@@ -1963,7 +1963,7 @@
%define ref r1+scan8start
%define mv r2+scan8start*4
%define bs0 r3
-%define bs1 r3+16
+%define bs1 r3+32
%macro LOAD_BYTES_MMX 1
movd m2, [%1+8*0-1]
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/x86/mc-a2.asm
^
|
@@ -40,7 +40,7 @@
pd_16: times 4 dd 16
pd_0f: times 4 dd 0xffff
-pf_inv256: times 4 dd 0.00390625
+pf_inv256: times 8 dd 0.00390625
pad10: times 8 dw 10*PIXEL_MAX
pad20: times 8 dw 20*PIXEL_MAX
@@ -1128,7 +1128,7 @@
%endif
; These functions are not general-use; not only do the SSE ones require aligned input,
-; but they also will fail if given a non-mod16 size or a size less than 64.
+; but they also will fail if given a non-mod16 size.
; memzero SSE will fail for non-mod128.
;-----------------------------------------------------------------------------
@@ -1136,12 +1136,15 @@
;-----------------------------------------------------------------------------
cglobal memcpy_aligned_mmx, 3,3
test r2d, 16
- jz .copy32
+ jz .copy32start
sub r2d, 16
movq mm0, [r1 + r2 + 0]
movq mm1, [r1 + r2 + 8]
movq [r0 + r2 + 0], mm0
movq [r0 + r2 + 8], mm1
+.copy32start
+ test r2d, r2d
+ jz .ret
.copy32:
sub r2d, 32
movq mm0, [r1 + r2 + 0]
@@ -1153,6 +1156,7 @@
movq [r0 + r2 + 16], mm2
movq [r0 + r2 + 24], mm3
jg .copy32
+.ret
REP_RET
;-----------------------------------------------------------------------------
@@ -1166,12 +1170,15 @@
movdqa [r0 + r2], xmm0
.copy32:
test r2d, 32
- jz .copy64
+ jz .copy64start
sub r2d, 32
movdqa xmm0, [r1 + r2 + 0]
movdqa [r0 + r2 + 0], xmm0
movdqa xmm1, [r1 + r2 + 16]
movdqa [r0 + r2 + 16], xmm1
+.copy64start
+ test r2d, r2d
+ jz .ret
.copy64:
sub r2d, 64
movdqa xmm0, [r1 + r2 + 0]
@@ -1183,6 +1190,7 @@
movdqa xmm3, [r1 + r2 + 48]
movdqa [r0 + r2 + 48], xmm3
jg .copy64
+.ret:
REP_RET
;-----------------------------------------------------------------------------
@@ -1622,7 +1630,7 @@
; uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
;-----------------------------------------------------------------------------
cglobal mbtree_propagate_cost_sse2, 7,7,7
- shl r6d, 1
+ add r6d, r6d
lea r0, [r0+r6*2]
add r1, r6
add r2, r6
@@ -1665,3 +1673,49 @@
jl .loop
REP_RET
+%macro INT16_TO_FLOAT 1
+ vpunpckhwd xmm4, xmm%1, xmm7
+ vpunpcklwd xmm%1, xmm7
+ vinsertf128 ymm%1, ymm%1, xmm4, 1
+ vcvtdq2ps ymm%1, ymm%1
+%endmacro
+
+; FIXME: align loads/stores to 16 bytes
+cglobal mbtree_propagate_cost_avx, 7,7,8
+ add r6d, r6d
+ lea r0, [r0+r6*2]
+ add r1, r6
+ add r2, r6
+ add r3, r6
+ add r4, r6
+ neg r6
+ vmovdqa xmm5, [pw_3fff]
+ vbroadcastss ymm6, [r5]
+ vmulps ymm6, ymm6, [pf_inv256]
+ vpxor xmm7, xmm7
+.loop:
+ vmovdqu xmm0, [r2+r6] ; intra
+ vmovdqu xmm1, [r4+r6] ; invq
+ vmovdqu xmm2, [r1+r6] ; prop
+ vpand xmm3, xmm5, [r3+r6] ; inter
+ INT16_TO_FLOAT 0
+ INT16_TO_FLOAT 1
+ INT16_TO_FLOAT 2
+ INT16_TO_FLOAT 3
+ vmulps ymm1, ymm1, ymm0
+ vsubps ymm4, ymm0, ymm3
+ vmulps ymm1, ymm1, ymm6 ; intra*invq*fps_factor>>8
+ vaddps ymm1, ymm1, ymm2 ; prop + (intra*invq*fps_factor>>8)
+ vrcpps ymm3, ymm0 ; 1 / intra 1st approximation
+ vmulps ymm2, ymm0, ymm3 ; intra * (1/intra 1st approx)
+ vmulps ymm2, ymm2, ymm3 ; intra * (1/intra 1st approx)^2
+ vmulps ymm1, ymm1, ymm4 ; (prop + (intra*invq*fps_factor>>8)) * (intra - inter)
+ vaddps ymm3, ymm3, ymm3 ; 2 * (1/intra 1st approx)
+ vsubps ymm3, ymm3, ymm2 ; 2nd approximation for 1/intra
+ vmulps ymm1, ymm1, ymm3 ; / intra
+ vcvtps2dq ymm1, ymm1
+ vmovdqu [r0+r6*2], ymm1
+ add r6, 16
+ jl .loop
+ vzeroupper
+ RET
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/x86/mc-c.c
^
|
@@ -140,6 +140,8 @@
void x264_integral_init4v_ssse3( uint16_t *sum8, uint16_t *sum4, int stride );
void x264_mbtree_propagate_cost_sse2( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
+void x264_mbtree_propagate_cost_avx( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+ uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
#define MC_CHROMA(cpu)\
void x264_mc_chroma_##cpu( pixel *dstu, pixel *dstv, int i_dst,\
@@ -728,4 +730,8 @@
if( !(cpu&X264_CPU_STACK_MOD4) )
pf->mc_chroma = x264_mc_chroma_avx;
#endif // HIGH_BIT_DEPTH
+
+ if( !(cpu&X264_CPU_AVX) )
+ return;
+ pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx;
}
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/x86/pixel.h
^
|
@@ -138,6 +138,8 @@
int x264_pixel_var2_8x8_mmxext( pixel *, int, pixel *, int, int * );
int x264_pixel_var2_8x8_sse2( pixel *, int, pixel *, int, int * );
int x264_pixel_var2_8x8_ssse3( uint8_t *, int, uint8_t *, int, int * );
+int x264_pixel_vsad_mmxext( pixel *src, int stride, int height );
+int x264_pixel_vsad_sse2( pixel *src, int stride, int height );
#define DECL_ADS( size, suffix ) \
int x264_pixel_ads##size##_##suffix( int enc_dc[size], uint16_t *sums, int delta,\
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/x86/predict-c.c
^
|
@@ -180,7 +180,7 @@
PREDICT_16x16_P( avx )
#endif //!HIGH_BIT_DEPTH
-#ifdef __GNUC__
+#if HAVE_X86_INLINE_ASM
#if HIGH_BIT_DEPTH
static void x264_predict_16x16_p_sse2( uint16_t *src )
#else
@@ -191,10 +191,10 @@
int H, V;
#if HIGH_BIT_DEPTH
asm (
- "movdqu -2+%1, %%xmm1 \n"
- "movdqa 16+%1, %%xmm0 \n"
- "pmaddwd %2, %%xmm0 \n"
- "pmaddwd %3, %%xmm1 \n"
+ "movdqu %1, %%xmm1 \n"
+ "movdqa %2, %%xmm0 \n"
+ "pmaddwd %3, %%xmm0 \n"
+ "pmaddwd %4, %%xmm1 \n"
"paddd %%xmm1, %%xmm0 \n"
"movhlps %%xmm0, %%xmm1 \n"
"paddd %%xmm1, %%xmm0 \n"
@@ -202,24 +202,26 @@
"paddd %%xmm1, %%xmm0 \n"
"movd %%xmm0, %0 \n"
:"=r"(H)
- :"m"(src[-FDEC_STRIDE]), "m"(*pw_12345678), "m"(*pw_m87654321)
+ :"m"(src[-FDEC_STRIDE-1]), "m"(src[-FDEC_STRIDE+8]),
+ "m"(*pw_12345678), "m"(*pw_m87654321)
);
#else
asm (
"movq %1, %%mm1 \n"
- "movq 8+%1, %%mm0 \n"
- "palignr $7, -8+%1, %%mm1 \n"
- "pmaddubsw %2, %%mm0 \n"
- "pmaddubsw %3, %%mm1 \n"
+ "movq %2, %%mm0 \n"
+ "palignr $7, %3, %%mm1 \n"
+ "pmaddubsw %4, %%mm0 \n"
+ "pmaddubsw %5, %%mm1 \n"
"paddw %%mm1, %%mm0 \n"
"pshufw $14, %%mm0, %%mm1 \n"
"paddw %%mm1, %%mm0 \n"
"pshufw $1, %%mm0, %%mm1 \n"
"paddw %%mm1, %%mm0 \n"
"movd %%mm0, %0 \n"
- "movsx %w0, %0 \n"
+ "movswl %w0, %0 \n"
:"=r"(H)
- :"m"(src[-FDEC_STRIDE]), "m"(*pb_12345678), "m"(*pb_m87654321)
+ :"m"(src[-FDEC_STRIDE]), "m"(src[-FDEC_STRIDE+8]),
+ "m"(src[-FDEC_STRIDE-8]), "m"(*pb_12345678), "m"(*pb_m87654321)
);
#endif
V = 8 * ( src[15*FDEC_STRIDE-1] - src[-1*FDEC_STRIDE-1] )
@@ -269,7 +271,7 @@
#endif //!HIGH_BIT_DEPTH
-#ifdef __GNUC__
+#if HAVE_X86_INLINE_ASM
#if HIGH_BIT_DEPTH
static void x264_predict_8x8c_p_sse2( uint16_t *src )
#else
@@ -299,7 +301,7 @@
"pshufw $1, %%mm0, %%mm1 \n"
"paddw %%mm1, %%mm0 \n"
"movd %%mm0, %0 \n"
- "movsx %w0, %0 \n"
+ "movswl %w0, %0 \n"
:"=r"(H)
:"m"(src[-FDEC_STRIDE]), "m"(*pb_m32101234)
);
@@ -430,7 +432,9 @@
pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
pf[I_PRED_16x16_V] = x264_predict_16x16_v_sse2;
pf[I_PRED_16x16_H] = x264_predict_16x16_h_sse2;
+#if HAVE_X86_INLINE_ASM
pf[I_PRED_16x16_P] = x264_predict_16x16_p_sse2;
+#endif
#else
#if !ARCH_X86_64
pf[I_PRED_16x16_P] = x264_predict_16x16_p_mmxext;
@@ -447,7 +451,7 @@
if( !(cpu&X264_CPU_SSSE3) )
return;
pf[I_PRED_16x16_H] = x264_predict_16x16_h_ssse3;
-#ifdef __GNUC__
+#if HAVE_X86_INLINE_ASM
pf[I_PRED_16x16_P] = x264_predict_16x16_p_ssse3;
#endif
if( !(cpu&X264_CPU_AVX) )
@@ -471,7 +475,9 @@
pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_sse2;
pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_sse2;
pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_sse2;
+#if HAVE_X86_INLINE_ASM
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_sse2;
+#endif
#else
#if ARCH_X86_64
pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
@@ -491,7 +497,7 @@
if( !(cpu&X264_CPU_SSSE3) )
return;
pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_ssse3;
-#ifdef __GNUC__
+#if HAVE_X86_INLINE_ASM
pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_ssse3;
#endif
#endif // HIGH_BIT_DEPTH
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/x86/quant-a.asm
^
|
@@ -7,6 +7,7 @@
;* Jason Garrett-Glaser <darkshikari@gmail.com>
;* Christian Heine <sennindemokrit@gmx.net>
;* Oskar Arvidsson <oskar@irock.se>
+;* Henrik Gramner <hengar-6@student.ltu.se>
;*
;* This program is free software; you can redistribute it and/or modify
;* it under the terms of the GNU General Public License as published by
@@ -69,12 +70,18 @@
db 9,13,9,12,12,16,9,13,12,16,13,16,16,20,7,10,9,13,10,13,13,17,9,13,12,16
db 13,16,16,20,10,13,13,17,13,16,16,20,13,17,16,20,17,20,20,24
+chroma_dc_dct_mask_mmx: dw 0, 0,-1,-1, 0, 0,-1,-1
+chroma_dc_dmf_mask_mmx: dw 0, 0,-1,-1, 0,-1,-1, 0
+chroma_dc_dct_mask: dw 1, 1,-1,-1, 1, 1,-1,-1
+chroma_dc_dmf_mask: dw 1, 1,-1,-1, 1,-1,-1, 1
+
SECTION .text
cextern pb_1
cextern pw_1
cextern pd_1
cextern pb_01
+cextern pd_1024
%macro QUANT_DC_START_MMX 0
movd m6, r1m ; mf
@@ -117,12 +124,18 @@
psignw %1, %2
%endmacro
-%macro PSIGND_MMX 2
+%macro PSIGND_MMX 2-3
+%if %0==3
+ mova %1, %2
+ pxor %1, %3
+ psubd %1, %3
+%else
pxor %1, %2
psubd %1, %2
+%endif
%endmacro
-%macro PSIGND_SSSE3 2
+%macro PSIGND_SSSE3 2+
psignd %1, %2
%endmacro
@@ -747,6 +760,126 @@
DEQUANT_DC avx , w
%endif
+; t4 is eax for return value.
+%ifdef ARCH_X86_64
+ DECLARE_REG_TMP 0,1,2,3,6,4 ; Identical for both Windows and *NIX
+%else
+ DECLARE_REG_TMP 4,1,2,3,0,5
+%endif
+
+;-----------------------------------------------------------------------------
+; x264_optimize_chroma_dc( dctcoef dct[4], int dequant_mf )
+;-----------------------------------------------------------------------------
+
+; %2 == 1 for sse2 or ssse3, 0 for sse4/avx
+%macro OPTIMIZE_CHROMA_DC 2
+%assign %%regs 4+%2
+%ifndef ARCH_X86_64
+ %assign %%regs %%regs+1 ; t0-t4 are volatile on x86-64
+%endif
+cglobal optimize_chroma_dc_%1, 0,%%regs,7
+ movifnidn t0, r0mp
+ movd m2, r1m
+ movq m1, [t0]
+%if %2
+ pxor m4, m4
+%else ; sse4, avx
+ pcmpeqb m4, m4
+ pslld m4, 11
+%endif
+%ifidn %1, sse2
+ mova m3, [chroma_dc_dct_mask_mmx]
+ mova m5, [chroma_dc_dmf_mask_mmx]
+%else
+ mova m3, [chroma_dc_dct_mask]
+ mova m5, [chroma_dc_dmf_mask]
+%endif
+ pshuflw m2, m2, 0
+ pshufd m0, m1, 00010001b ; 1 0 3 2 1 0 3 2
+ punpcklqdq m2, m2
+ punpcklqdq m1, m1 ; 3 2 1 0 3 2 1 0
+ mova m6, [pd_1024] ; 32<<5, elements are shifted 5 bits to the left
+ PSIGNW m0, m3 ; -1 -0 3 2 -1 -0 3 2
+ PSIGNW m2, m5 ; + - - + - - + +
+ paddw m0, m1 ; -1+3 -0+2 1+3 0+2 -1+3 -0+2 1+3 0+2
+ pmaddwd m0, m2 ; 0-1-2+3 0-1+2-3 0+1-2-3 0+1+2+3 * dmf
+ punpcklwd m1, m1
+ psrad m2, 16 ; + - - +
+ mov t1d, 3
+ paddd m0, m6
+ xor t4d, t4d
+%ifidn %1, sse2
+ psrad m1, 31 ; has to be 0 or -1 in order for PSIGND_MMX to work correctly
+%endif
+%if %2
+ mova m6, m0
+ SWAP 0, 6
+ psrad m6, 11
+ pcmpeqd m6, m4
+ pmovmskb t5d, m6
+ cmp t5d, 0xffff
+%else ; sse4, avx
+ ptest m0, m4
+%endif
+ jz .ret ; if the DC coefficients already round to zero, terminate early
+ mova m3, m0
+.outer_loop:
+ movsx t3d, word [t0+2*t1] ; dct[coeff]
+ pshufd m6, m1, 11111111b
+ pshufd m1, m1, 10010000b ; move the next element to high dword
+ PSIGND m5, m2, m6
+ test t3d, t3d
+ jz .loop_end
+.outer_loop_0:
+ mov t2d, t3d
+ sar t3d, 31
+ or t3d, 1
+.inner_loop:
+ psubd m3, m5 ; coeff -= sign
+ pxor m6, m0, m3
+%if %2
+ psrad m6, 11
+ pcmpeqd m6, m4
+ pmovmskb t5d, m6
+ cmp t5d, 0xffff
+%else ; sse4, avx
+ ptest m6, m4
+%endif
+ jz .round_coeff
+ paddd m3, m5 ; coeff += sign
+ mov t4d, 1
+.loop_end:
+ dec t1d
+ jz .last_coeff
+ pshufd m2, m2, 01111000b ; - + - + / - - + +
+ jg .outer_loop
+.ret:
+ REP_RET
+.round_coeff:
+ sub t2d, t3d
+ mov [t0+2*t1], t2w
+ jnz .inner_loop
+ jmp .loop_end
+.last_coeff:
+ movsx t3d, word [t0]
+ punpcklqdq m2, m2 ; + + + +
+ PSIGND m5, m2, m1
+ test t3d, t3d
+ jnz .outer_loop_0
+ REP_RET
+%endmacro
+
+INIT_XMM
+%define PSIGNW PSIGNW_MMX
+%define PSIGND PSIGND_MMX
+OPTIMIZE_CHROMA_DC sse2, 1
+%define PSIGNW PSIGNW_SSSE3
+%define PSIGND PSIGND_SSSE3
+OPTIMIZE_CHROMA_DC ssse3, 1
+OPTIMIZE_CHROMA_DC sse4, 0
+INIT_AVX
+OPTIMIZE_CHROMA_DC avx, 0
+
%ifdef HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void denoise_dct( int32_t *dct, uint32_t *sum, uint32_t *offset, int size )
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/x86/quant.h
^
|
@@ -57,6 +57,10 @@
void x264_dequant_8x8_flat16_mmx( int16_t dct[64], int dequant_mf[6][64], int i_qp );
void x264_dequant_4x4_flat16_sse2( int16_t dct[16], int dequant_mf[6][16], int i_qp );
void x264_dequant_8x8_flat16_sse2( int16_t dct[64], int dequant_mf[6][64], int i_qp );
+int x264_optimize_chroma_dc_sse2( dctcoef dct[4], int dequant_mf );
+int x264_optimize_chroma_dc_ssse3( dctcoef dct[4], int dequant_mf );
+int x264_optimize_chroma_dc_sse4( dctcoef dct[4], int dequant_mf );
+int x264_optimize_chroma_dc_avx( dctcoef dct[4], int dequant_mf );
void x264_denoise_dct_mmx ( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size );
void x264_denoise_dct_sse2 ( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size );
void x264_denoise_dct_ssse3( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size );
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/x86/sad-a.asm
^
|
@@ -273,6 +273,71 @@
RET
;-----------------------------------------------------------------------------
+; void pixel_vsad( pixel *src, int stride );
+;-----------------------------------------------------------------------------
+
+%ifndef ARCH_X86_64
+INIT_MMX
+cglobal pixel_vsad_mmxext, 3,3
+ mova m0, [r0]
+ mova m1, [r0+8]
+ mova m2, [r0+r1]
+ mova m3, [r0+r1+8]
+ lea r0, [r0+r1*2]
+ psadbw m0, m2
+ psadbw m1, m3
+ paddw m0, m1
+ sub r2d, 2
+ je .end
+.loop:
+ mova m4, [r0]
+ mova m5, [r0+8]
+ mova m6, [r0+r1]
+ mova m7, [r0+r1+8]
+ lea r0, [r0+r1*2]
+ psadbw m2, m4
+ psadbw m3, m5
+ psadbw m4, m6
+ psadbw m5, m7
+ paddw m0, m2
+ paddw m0, m3
+ paddw m0, m4
+ paddw m0, m5
+ mova m2, m6
+ mova m3, m7
+ sub r2d, 2
+ jg .loop
+.end:
+ movd eax, m0
+ RET
+%endif
+
+INIT_XMM
+cglobal pixel_vsad_sse2, 3,3
+ mova m0, [r0]
+ mova m1, [r0+r1]
+ lea r0, [r0+r1*2]
+ psadbw m0, m1
+ sub r2d, 2
+ je .end
+.loop:
+ mova m2, [r0]
+ mova m3, [r0+r1]
+ lea r0, [r0+r1*2]
+ psadbw m1, m2
+ psadbw m2, m3
+ paddw m0, m1
+ paddw m0, m2
+ mova m1, m3
+ sub r2d, 2
+ jg .loop
+.end:
+ movhlps m1, m0
+ paddw m0, m1
+ movd eax, m0
+ RET
+
+;-----------------------------------------------------------------------------
; void intra_sad_x3_4x4( uint8_t *fenc, uint8_t *fdec, int res[3] );
;-----------------------------------------------------------------------------
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/common/x86/util.h
^
|
@@ -27,11 +27,19 @@
#ifndef X264_X86_UTIL_H
#define X264_X86_UTIL_H
-#ifdef __GNUC__
-
#ifdef __SSE__
#include <xmmintrin.h>
+
+#undef M128_ZERO
+#define M128_ZERO ((__m128){0,0,0,0})
+#define x264_union128_t x264_union128_sse_t
+typedef union { __m128 i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; uint8_t d[16]; } MAY_ALIAS x264_union128_sse_t;
+#if HAVE_VECTOREXT
+typedef uint32_t v4si __attribute__((vector_size (16)));
#endif
+#endif // __SSE__
+
+#if HAVE_X86_INLINE_ASM && HAVE_MMX
#define x264_median_mv x264_median_mv_mmxext
static ALWAYS_INLINE void x264_median_mv_mmxext( int16_t *dst, int16_t *a, int16_t *b, int16_t *c )
@@ -92,11 +100,13 @@
{
static const uint64_t pb_2 = 0x0202020202020202ULL;
static const uint64_t pb_32 = 0x2020202020202020ULL;
+ static const uint64_t pb_33 = 0x2121212121212121ULL;
int amvd;
asm(
"movd %1, %%mm0 \n"
"movd %2, %%mm1 \n"
- "paddb %%mm1, %%mm0 \n"
+ "paddusb %%mm1, %%mm0 \n"
+ "pminub %5, %%mm0 \n"
"pxor %%mm2, %%mm2 \n"
"movq %%mm0, %%mm1 \n"
"pcmpgtb %3, %%mm0 \n"
@@ -106,7 +116,7 @@
"movd %%mm2, %0 \n"
:"=r"(amvd)
:"m"(M16( mvdleft )),"m"(M16( mvdtop )),
- "m"(pb_2),"m"(pb_32)
+ "m"(pb_2),"m"(pb_32),"m"(pb_33)
);
return amvd;
}
@@ -149,13 +159,6 @@
);
}
-#ifdef __SSE__
-#undef M128_ZERO
-#define M128_ZERO ((__m128){0,0,0,0})
-#define x264_union128_t x264_union128_sse_t
-typedef union { __m128 i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; uint8_t d[16]; } MAY_ALIAS x264_union128_sse_t;
-#endif
-
#endif
#endif
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/configure
^
|
@@ -7,6 +7,10 @@
available options:
--help print this message
+ --disable-cli disables cli
+ --system-libx264 use system libx264 instead of internal
+ --enable-shared build shared library
+ --enable-static build static library
--disable-avs disables avisynth support (windows only)
--disable-lavf disables libavformat support
--disable-ffms disables ffmpegsource support
@@ -16,11 +20,12 @@
--enable-win32thread use win32threads (windows only)
--disable-swscale disables swscale support
--disable-asm disables platform-specific assembly optimizations
- --enable-debug adds -g, doesn't strip
- --enable-gprof adds -pg, doesn't strip
+ --disable-interlaced disables interlaced encoding support
+ --enable-debug adds -g
+ --enable-gprof adds -pg
+ --enable-strip adds -s
--enable-visualize enables visualization (X11 only)
--enable-pic build position-independent code
- --enable-shared build shared library
--bit-depth=BIT_DEPTH sets output bit depth (8-10), default 8
--extra-asflags=EASFLAGS add EASFLAGS to ASFLAGS
--extra-cflags=ECFLAGS add ECFLAGS to CFLAGS
@@ -49,6 +54,45 @@
echo "$1" >> config.log
}
+intel_cflags() {
+ # Intel Compiler issues an incredibly large number of warnings on any warning level,
+ # suppress them by disabling all warnings rather than having to use #pragmas to disable most of them
+ for arg in $*; do
+ [ $arg = -ffast-math ] && arg=
+ [[ "$arg" = -falign-loops* ]] && arg=
+ [ "$arg" = -fno-tree-vectorize ] && arg=
+ [ "$arg" = -Wshadow ] && arg=
+ if [ $compiler = ICL ]; then
+ [ "$arg" = -Wall ] && arg=-W0
+ [ "$arg" = -g ] && arg=-Z7
+ [ "$arg" = -fomit-frame-pointer ] && arg=
+ [ "$arg" = -s ] && arg=
+ [ "$arg" = -fPIC ] && arg=
+ else
+ [ "$arg" = -Wall ] && arg=-w0
+ fi
+
+ [ -n "$arg" ] && echo -n "$arg "
+ done
+}
+
+icl_ldflags() {
+ for arg in $*; do
+ arg=${arg/LIBPATH/libpath}
+ [ ${arg#-libpath:} == $arg -a ${arg#-l} != $arg ] && arg=${arg#-l}.lib
+ [ ${arg#-L} != $arg ] && arg=-libpath:${arg#-L}
+ [ $arg = -Wl,--large-address-aware ] && arg=-largeaddressaware
+ [ $arg = -s ] && arg=
+ [ "$arg" = -Wl,-Bsymbolic ] && arg=
+
+ arg=${arg/pthreadGC/pthreadVC}
+ [ "$arg" = avifil32.lib ] && arg=vfw32.lib
+ [ "$arg" = gpac_static.lib ] && arg=libgpac_static.lib
+
+ [ -n "$arg" ] && echo -n "$arg "
+ done
+}
+
cc_check() {
if [ -z "$3" ]; then
if [ -z "$1$2" ]; then
@@ -59,14 +103,23 @@
log_check "for $1"
fi
elif [ -z "$1" ]; then
- log_check "whether $CC supports $3"
+ if [ -z "$2" ]; then
+ log_check "whether $CC supports $3"
+ else
+ log_check "whether $CC supports $3 with $2"
+ fi
else
log_check "for $3 in $1";
fi
rm -f conftest.c
[ -n "$1" ] && echo "#include <$1>" > conftest.c
echo "int main () { $3 return 0; }" >> conftest.c
- if $CC conftest.c $CFLAGS $2 $LDFLAGSCLI $LDFLAGS -o conftest >conftest.log 2>&1; then
+ if [ $compiler = ICL ]; then
+ cc_cmd="$CC conftest.c $CFLAGS $2 -link $(icl_ldflags $2 $LDFLAGSCLI $LDFLAGS)"
+ else
+ cc_cmd="$CC conftest.c $CFLAGS $2 $LDFLAGSCLI $LDFLAGS -o conftest"
+ fi
+ if $cc_cmd >conftest.log 2>&1; then
res=$?
log_ok
else
@@ -74,7 +127,7 @@
log_fail
log_msg "Failed commandline was:"
log_msg "--------------------------------------------------"
- log_msg "$CC conftest.c $CFLAGS $2 $LDFLAGSCLI $LDFLAGS"
+ log_msg "$cc_cmd"
cat conftest.log >> config.log
log_msg "--------------------------------------------------"
log_msg "Failed program was:"
@@ -140,7 +193,7 @@
exit 1
}
-rm -f x264_config.h config.h config.mak config.log x264.pc conftest*
+rm -f x264_config.h config.h config.mak config.log x264.pc x264.def conftest*
prefix='/usr/local'
exec_prefix='${prefix}'
@@ -149,6 +202,10 @@
includedir='${prefix}/include'
DEVNULL='/dev/null'
+cli="yes"
+cli_libx264="internal"
+shared="no"
+static="no"
avs="auto"
lavf="auto"
ffms="auto"
@@ -157,12 +214,14 @@
thread="auto"
swscale="auto"
asm="auto"
+interlaced="yes"
debug="no"
gprof="no"
+strip="no"
pic="no"
vis="no"
-shared="no"
bit_depth="8"
+compiler="GNU"
CFLAGS="$CFLAGS -Wall -I."
LDFLAGS="$LDFLAGS"
@@ -174,7 +233,7 @@
EXE=""
# list of all preprocessor HAVE values we can define
-CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL"
+CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL VECTOREXT INTERLACED"
# parse options
@@ -196,9 +255,24 @@
--includedir=*)
includedir="$optarg"
;;
+ --disable-cli)
+ cli="no"
+ ;;
+ --system-libx264)
+ cli_libx264="system"
+ ;;
+ --enable-shared)
+ shared="yes"
+ ;;
+ --enable-static)
+ static="yes"
+ ;;
--disable-asm)
asm="no"
;;
+ --disable-interlaced)
+ interlaced="no"
+ ;;
--disable-avs)
avs="no"
;;
@@ -240,12 +314,12 @@
LDFLAGS="$LDFLAGS -pg"
gprof="yes"
;;
+ --enable-strip)
+ strip="yes"
+ ;;
--enable-pic)
pic="yes"
;;
- --enable-shared)
- shared="yes"
- ;;
--enable-visualize)
vis="yes"
;;
@@ -273,6 +347,8 @@
esac
done
+[ "$cli" = "no" -a "$shared" = "no" -a "$static" = "no" ] && die "Nothing to build. Enable cli, shared or static."
+
CC="${CC-${cross_prefix}gcc}"
AR="${AR-${cross_prefix}ar}"
RANLIB="${RANLIB-${cross_prefix}ranlib}"
@@ -290,6 +366,26 @@
host_vendor="${host%%-*}"
host_os="${host#*-}"
+# test for use of Intel Compiler
+if [[ $host_os = mingw* || $host_os = cygwin* ]]; then
+ if [[ `basename "$CC"` = icl* ]]; then
+ # Windows Intel Compiler creates dependency generation with absolute Windows paths, Cygwin's make does not support Windows paths.
+ [[ $host_os = cygwin* ]] && die "Windows Intel Compiler support requires MSYS"
+ compiler=ICL
+ CFLAGS="$CFLAGS -Qstd=c99 -nologo -Qms0 -DHAVE_STRING_H -Iextras"
+ QPRE="-Q"
+ `$CC 2>&1 | grep -q IA-32` && host_cpu=i486
+ `$CC 2>&1 | grep -q "Intel(R) 64"` && host_cpu=x86_64
+ cpp_check "" "" "_MSC_VER >= 1400" || die "Windows Intel Compiler support requires Visual Studio 2005 or newer"
+ fi
+else
+ if [[ `basename "$CC"` = icc* ]]; then
+ AR="xiar"
+ compiler=ICC
+ QPRE="-"
+ fi
+fi
+
case $host_os in
beos*)
SYS="BEOS"
@@ -326,16 +422,21 @@
LDFLAGS="$LDFLAGS -lm"
;;
cygwin*)
- SYS="MINGW"
EXE=".exe"
- DEVNULL="NUL"
if cc_check "" -mno-cygwin; then
CFLAGS="$CFLAGS -mno-cygwin"
LDFLAGS="$LDFLAGS -mno-cygwin"
fi
+ if cpp_check "" "" "defined(__CYGWIN32__)" ; then
+ define HAVE_MALLOC_H
+ SYS="CYGWIN"
+ else
+ SYS="WINDOWS"
+ DEVNULL="NUL"
+ fi
;;
mingw*)
- SYS="MINGW"
+ SYS="WINDOWS"
EXE=".exe"
DEVNULL="NUL"
;;
@@ -355,15 +456,31 @@
ARCH="X86"
AS="yasm"
ASFLAGS="$ASFLAGS -O2"
- if [[ "$asm" == auto && "$CFLAGS" != *-march* ]]; then
- CFLAGS="$CFLAGS -march=i686"
- fi
- if [[ "$asm" == auto && "$CFLAGS" != *-mfpmath* ]]; then
- CFLAGS="$CFLAGS -mfpmath=sse -msse"
+ if [ $compiler = GNU ]; then
+ if [[ "$asm" == auto && "$CFLAGS" != *-march* ]]; then
+ CFLAGS="$CFLAGS -march=i686"
+ fi
+ if [[ "$asm" == auto && "$CFLAGS" != *-mfpmath* ]]; then
+ CFLAGS="$CFLAGS -mfpmath=sse -msse"
+ fi
+ else
+ # icc on linux has various degrees of mod16 stack support
+ if [ $SYS = LINUX ]; then
+ # < 11 is completely incapable of keeping a mod16 stack
+ if cpp_check "" "" "__INTEL_COMPILER < 1100" ; then
+ define BROKEN_STACK_ALIGNMENT
+ # 11 <= x < 12 is capable of keeping a mod16 stack, but defaults to not doing so.
+ elif cpp_check "" "" "__INTEL_COMPILER < 1200" ; then
+ CFLAGS="$CFLAGS -falign-stack=assume-16-byte"
+ fi
+ # >= 12 defaults to a mod16 stack
+ fi
+ # icl on windows has no mod16 stack support
+ [ $SYS = WINDOWS ] && define BROKEN_STACK_ALIGNMENT
fi
if [ "$SYS" = MACOSX ]; then
ASFLAGS="$ASFLAGS -f macho -DPREFIX"
- elif [ "$SYS" = MINGW ]; then
+ elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then
ASFLAGS="$ASFLAGS -f win32 -DPREFIX"
LDFLAGS="$LDFLAGS -Wl,--large-address-aware"
else
@@ -379,9 +496,10 @@
CFLAGS="$CFLAGS -arch x86_64"
LDFLAGS="$LDFLAGS -arch x86_64"
fi
- elif [ "$SYS" = MINGW ]; then
+ elif [ "$SYS" = WINDOWS ]; then
ASFLAGS="$ASFLAGS -f win32 -m amd64"
- cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX"
+ # only the GNU toolchain is inconsistent in prefixing function names with _
+ [ $compiler = GNU ] && cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX"
else
ASFLAGS="$ASFLAGS -f elf -m amd64"
fi
@@ -460,10 +578,14 @@
cc_check || die "No working C compiler found."
-if cc_check '' -std=gnu99 ; then
- CFLAGS="$CFLAGS -std=gnu99"
-elif cc_check '' -std=c99 ; then
- CFLAGS="$CFLAGS -std=c99 -D_POSIX_C_SOURCE=200112L -D_BSD_SOURCE"
+if [ $compiler != ICL ]; then
+ if cc_check '' -std=gnu99 'for( int i = 0; i < 9; i++ );' ; then
+ CFLAGS="$CFLAGS -std=gnu99"
+ elif cc_check '' -std=c99 'for( int i = 0; i < 9; i++ );' ; then
+ CFLAGS="$CFLAGS -std=c99 -D_POSIX_C_SOURCE=200112L -D_BSD_SOURCE"
+ elif ! cc_check '' '' 'for( int i = 0; i < 9; i++ );' ; then
+ die "C99 compiler is needed for compilation."
+ fi
fi
if [ $shared = yes -a \( $ARCH = "X86_64" -o $ARCH = "PPC" -o $ARCH = "ALPHA" -o $ARCH = "ARM" -o $ARCH = "IA64" \) ] ; then
@@ -472,14 +594,14 @@
if [ $asm = auto -a \( $ARCH = X86 -o $ARCH = X86_64 \) ] ; then
if ! as_check "vpaddw xmm0, xmm0, xmm0" ; then
- VER=`($AS --version || echo no assembler) 2>$DEVNULL | head -n 1`
+ VER=`($AS --version || echo no assembler) 2>/dev/null | head -n 1`
echo "Found $VER"
echo "Minimum version is yasm-0.7.0"
echo "If you really want to compile without asm, configure with --disable-asm."
exit 1
fi
if ! cc_check '' '' '__asm__("pabsw %xmm0, %xmm0");' ; then
- VER=`(${cross_prefix}as --version || echo no gnu as) 2>$DEVNULL | head -n 1`
+ VER=`(${cross_prefix}as --version || echo no gnu as) 2>/dev/null | head -n 1`
echo "Found $VER"
echo "Minimum version is binutils-2.17"
echo "Your compiler can't handle inline SSSE3 asm."
@@ -510,18 +632,21 @@
define ARCH_$ARCH
define SYS_$SYS
-echo "int i[2] = {0x42494745,0}; double f[2] = {0x1.0656e6469616ep+102,0};" > conftest.c
-$CC $CFLAGS conftest.c -c -o conftest.o 2>$DEVNULL || die "endian test failed"
-if (${cross_prefix}strings -a conftest.o | grep -q BIGE) && (${cross_prefix}strings -a conftest.o | grep -q FPendian) ; then
- define WORDS_BIGENDIAN
-elif !(${cross_prefix}strings -a conftest.o | grep -q EGIB && ${cross_prefix}strings -a conftest.o | grep -q naidnePF) ; then
- die "endian test failed"
+# skip endianness check for Intel Compiler, as all supported platforms are little. the -ipo flag will also cause the check to fail
+if [ $compiler = GNU ]; then
+ echo "int i[2] = {0x42494745,0}; double f[2] = {0x1.0656e6469616ep+102,0};" > conftest.c
+ $CC $CFLAGS conftest.c -c -o conftest.o 2>/dev/null || die "endian test failed"
+ if (${cross_prefix}strings -a conftest.o | grep -q BIGE) && (${cross_prefix}strings -a conftest.o | grep -q FPendian) ; then
+ define WORDS_BIGENDIAN
+ elif !(${cross_prefix}strings -a conftest.o | grep -q EGIB && ${cross_prefix}strings -a conftest.o | grep -q naidnePF) ; then
+ die "endian test failed"
+ fi
fi
# autodetect options that weren't forced nor disabled
# pthread-win32 is lgpl, prevent its use if --disable-gpl is specified and targeting windows
-[ "$SYS" = "MINGW" -a "$gpl" = "no" -a "$thread" = "auto" ] && thread="win32"
+[ "$SYS" = "WINDOWS" -a "$gpl" = "no" -a "$thread" = "auto" ] && thread="win32"
libpthread=""
if [ "$thread" = "auto" ]; then
@@ -531,7 +656,7 @@
thread="beos"
define HAVE_BEOSTHREAD
;;
- MINGW)
+ WINDOWS)
if cc_check pthread.h -lpthread "pthread_create(0,0,0,0);" ; then
thread="posix"
libpthread="-lpthread"
@@ -564,7 +689,8 @@
define HAVE_POSIXTHREAD
fi
if [ "$thread" = "win32" ]; then
- if [ "$SYS" = "MINGW" ]; then
+ # cygwin does not support win32 threads
+ if [ "$SYS" = "WINDOWS" ]; then
define HAVE_WIN32THREAD
else
thread="no"
@@ -590,30 +716,24 @@
if [ "$swscale" = "auto" ] ; then
swscale="no"
- if ${cross_prefix}pkg-config --exists libswscale 2>$DEVNULL; then
+ if ${cross_prefix}pkg-config --exists libswscale 2>/dev/null; then
SWSCALE_LIBS="$SWSCALE_LIBS $(${cross_prefix}pkg-config --libs libswscale)"
SWSCALE_CFLAGS="$SWSCALE_CFLAGS $(${cross_prefix}pkg-config --cflags libswscale)"
fi
[ -z "$SWSCALE_LIBS" ] && SWSCALE_LIBS="-lswscale -lavutil"
- error="swscale must be at least version 0.9.0"
- if cc_check "libswscale/swscale.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "sws_getContext(0,0,0,0,0,0,0,0,0,0);" ; then
- if cpp_check "libswscale/swscale.h" "$SWSCALE_CFLAGS" "LIBSWSCALE_VERSION_INT >= AV_VERSION_INT(0,9,0)" "$error"; then
- # we use colorspaces that were defined in libavutil r19775
- if cc_check "libavutil/pixfmt.h" "$SWSCALE_CFLAGS" "enum PixelFormat pixfmt = PIX_FMT_YUV422P16LE;" ; then
- swscale="yes"
- else
- echo "Warning: libavutil is too old, update to ffmpeg r19775+"
- fi
+ if cc_check "libswscale/swscale.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "sws_init_context(0,0,0);" ; then
+ if cc_check "libavutil/pixdesc.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "av_get_pix_fmt_name(0);" ; then
+ swscale="yes"
else
- echo "Warning: ${error}"
+ echo "Warning: av_get_pix_fmt_name is missing from libavutil, update for swscale support"
fi
fi
fi
if [ "$lavf" = "auto" ] ; then
lavf="no"
- if ${cross_prefix}pkg-config --exists libavformat libavcodec libswscale 2>$DEVNULL; then
+ if ${cross_prefix}pkg-config --exists libavformat libavcodec libswscale 2>/dev/null; then
LAVF_LIBS="$LAVF_LIBS $(${cross_prefix}pkg-config --libs libavformat libavcodec libavutil libswscale)"
LAVF_CFLAGS="$LAVF_CFLAGS $(${cross_prefix}pkg-config --cflags libavformat libavcodec libavutil libswscale)"
fi
@@ -625,15 +745,14 @@
fi
LAVF_LIBS="-L. $LAVF_LIBS"
if cc_check libavformat/avformat.h "$LAVF_CFLAGS $LAVF_LIBS" "avcodec_decode_video2(0,0,0,0);" ; then
- # libvautil/pixdesc.h included the private header intreadwrite.h until r21854
- if cc_check libavutil/pixdesc.h "$LAVF_CFLAGS $LAVF_LIBS" ; then
+ if cpp_check libavcodec/avcodec.h "$LAVF_CFLAGS $LAVF_LIBS" "LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(52,64,0)" ; then
if [ "$swscale" = "yes" ]; then
lavf="yes"
else
echo "Warning: libavformat is not supported without swscale support"
fi
else
- echo "Warning: libavutil is too old, update to ffmpeg r21854+"
+ echo "Warning: libavcodec is too old, update to ffmpeg r22735+"
fi
fi
fi
@@ -642,7 +761,7 @@
ffms_major="2"; ffms_minor="14"; ffms_micro="0"; ffms_bump="0"
ffms="no"
- if ${cross_prefix}pkg-config --exists ffms2 2>$DEVNULL; then
+ if ${cross_prefix}pkg-config --exists ffms2 2>/dev/null; then
FFMS2_LIBS="$FFMS2_LIBS $(${cross_prefix}pkg-config --libs ffms2)"
FFMS2_CFLAGS="$FFMS2_CFLAGS $(${cross_prefix}pkg-config --cflags ffms2)"
fi
@@ -682,12 +801,12 @@
fi
fi
-GPAC_LIBS="-lgpac_static"
-if [ $SYS = MINGW ]; then
- GPAC_LIBS="$GPAC_LIBS -lwinmm"
-fi
if [ "$gpac" = "auto" ] ; then
gpac="no"
+ cc_check "" -lz && GPAC_LIBS="-lgpac_static -lz" || GPAC_LIBS="-lgpac_static"
+ if [ "$SYS" = "WINDOWS" ] ; then
+ GPAC_LIBS="$GPAC_LIBS -lwinmm"
+ fi
if cc_check gpac/isomedia.h "$GPAC_LIBS" ; then
if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_set_pixel_aspect_ratio(0,0,0,0,0);" ; then
gpac="yes"
@@ -706,12 +825,15 @@
if [ "$avs" = "auto" ] ; then
avs="no"
- if [ $SYS = MINGW ] && cc_check extras/avisynth_c.h ; then
+ # cygwin can use avisynth if it can use LoadLibrary
+ if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibrary(0);") ; then
avs="yes"
define HAVE_AVS
fi
fi
+cc_check "stdint.h" "" "uint32_t test_vec __attribute__ ((vector_size (16))) = {0,1,2,3};" && define HAVE_VECTOREXT
+
if [ "$pic" = "yes" ] ; then
CFLAGS="$CFLAGS -fPIC"
ASFLAGS="$ASFLAGS -DPIC"
@@ -720,7 +842,11 @@
fi
if [ "$debug" != "yes" -a "$gprof" != "yes" ]; then
- CFLAGS="$CFLAGS -s -fomit-frame-pointer"
+ CFLAGS="$CFLAGS -fomit-frame-pointer"
+fi
+
+if [ "$strip" = "yes" ]; then
+ CFLAGS="$CFLAGS -s"
LDFLAGS="$LDFLAGS -s"
fi
@@ -738,7 +864,7 @@
CFLAGS="$CFLAGS -fno-tree-vectorize"
fi
-if [ $SYS = MINGW -a $ARCH = X86 ] ; then
+if [ $SYS = WINDOWS -a $ARCH = X86 -a $compiler = GNU ] ; then
# workaround gcc/ld bug with alignment of static variables/arrays that are initialized to zero
cc_check '' -fno-zero-initialized-in-bss && CFLAGS="$CFLAGS -fno-zero-initialized-in-bss"
fi
@@ -749,6 +875,9 @@
elif cc_check "stdio.h" "" "fseeko64(stdin,0,0);" ; then
define fseek fseeko64
define ftell ftello64
+elif cc_check "stdio.h" "" "_fseeki64(stdin,0,0);" ; then
+ define fseek _fseeki64
+ define ftell _ftelli64
fi
if cc_check '' -Wshadow ; then
@@ -764,18 +893,60 @@
[ $gpl = yes ] && define HAVE_GPL && x264_gpl=1 || x264_gpl=0
+[ $interlaced = yes ] && define HAVE_INTERLACED && x264_interlaced=1 || x264_interlaced=0
+
#define undefined vars as 0
for var in $CONFIG_HAVE; do
grep -q "HAVE_$var 1" config.h || define HAVE_$var 0
done
+if [ $compiler = ICL ]; then
+ AR="xilib -nologo -out:"
+ DEPMM=-QMM
+ DEPMT=-QMT
+ HAVE_GETOPT_LONG=0
+ LD="xilink -out:"
+ LDFLAGS="-nologo -incremental:no $(icl_ldflags $LDFLAGS)"
+ LDFLAGSCLI="$(icl_ldflags $LDFLAGSCLI)"
+ LIBX264=libx264.lib
+ RANLIB=
+ STRIP=
+ if [ $debug = yes ]; then
+ LDFLAGS="-debug $LDFLAGS"
+ CFLAGS="-D_DEBUG $CFLAGS"
+ else
+ CFLAGS="-DNDEBUG $CFLAGS"
+ fi
+else
+ AR="$AR rc "
+ DEPMM="-MM -g0"
+ DEPMT="-MT"
+ LD="$CC -o "
+ LIBX264=libx264.a
+fi
+if [ $compiler = GNU ]; then
+ PROF_GEN_CC="-fprofile-generate"
+ PROF_GEN_LD="-fprofile-generate"
+ PROF_USE_CC="-fprofile-use"
+ PROF_USE_LD="-fprofile-use"
+else
+ CFLAGS="$(intel_cflags $CFLAGS)"
+ # icc does not define __SSE__ until SSE2 optimization and icl never defines it or _M_IX86_FP
+ [ \( $ARCH = X86_64 -o $ARCH = X86 \) -a $asm = yes ] && ! cpp_check "" "" "defined(__SSE__)" && define __SSE__
+ PROF_GEN_CC="${QPRE}prof-gen ${QPRE}prof-dir."
+ PROF_GEN_LD=
+ PROF_USE_CC="${QPRE}prof-use ${QPRE}prof-dir."
+ PROF_USE_LD=
+fi
+
rm -f conftest*
# generate exported config file
cat > x264_config.h << EOF
-#define X264_BIT_DEPTH $bit_depth
-#define X264_GPL $x264_gpl
+#define X264_BIT_DEPTH $bit_depth
+#define X264_GPL $x264_gpl
+#define X264_INTERLACED $x264_interlaced
EOF
# generate config files
@@ -790,8 +961,11 @@
SYS=$SYS
CC=$CC
CFLAGS=$CFLAGS
+DEPMM=$DEPMM
+DEPMT=$DEPMT
+LD=$LD
LDFLAGS=$LDFLAGS
-LDFLAGSCLI=$LDFLAGSCLI
+LIBX264=$LIBX264
AR=$AR
RANLIB=$RANLIB
STRIP=$STRIP
@@ -800,30 +974,74 @@
EXE=$EXE
HAVE_GETOPT_LONG=$HAVE_GETOPT_LONG
DEVNULL=$DEVNULL
+PROF_GEN_CC=$PROF_GEN_CC
+PROF_GEN_LD=$PROF_GEN_LD
+PROF_USE_CC=$PROF_USE_CC
+PROF_USE_LD=$PROF_USE_LD
EOF
+if [ $compiler = ICL ]; then
+ echo '%.o: %.c' >> config.mak
+ echo ' $(CC) $(CFLAGS) -c -Fo$@ $<' >> config.mak
+fi
+
+if [ "$cli" = "yes" ]; then
+ echo 'default: cli' >> config.mak
+ echo 'install: install-cli' >> config.mak
+fi
+
if [ "$shared" = "yes" ]; then
API=$(grep '#define X264_BUILD' < x264.h | cut -f 3 -d ' ')
- if [ "$SYS" = "MINGW" ]; then
+ if [ "$SYS" = "WINDOWS" -o "$SYS" = "CYGWIN" ]; then
echo "SONAME=libx264-$API.dll" >> config.mak
- echo 'IMPLIBNAME=libx264.dll.a' >> config.mak
- echo 'SOFLAGS=-Wl,--out-implib,$(IMPLIBNAME) -Wl,--enable-auto-image-base' >> config.mak
+ if [ $compiler = ICL ]; then
+ echo 'IMPLIBNAME=libx264.dll.lib' >> config.mak
+ # GNU ld on windows defaults to exporting all global functions if there are no explicit __declspec(dllexport) declarations
+ # MSVC link does not act similarly, so it is required to make an export definition out of x264.h and use it at link time
+ echo 'SOFLAGS=-dll -def:x264.def -implib:$(IMPLIBNAME)' >> config.mak
+ echo "EXPORTS" > x264.def
+ grep "^\(int\|void\|x264_t\|extern\).*x264.*[\[(;]" x264.h | sed -e "s/.*\(x264.*\)[\[(].*/\1/;s/.*\(x264.*\);/\1/;s/open/open_$API/g" >> x264.def
+ else
+ echo 'IMPLIBNAME=libx264.dll.a' >> config.mak
+ echo 'SOFLAGS=-shared -Wl,--out-implib,$(IMPLIBNAME) -Wl,--enable-auto-image-base' >> config.mak
+ fi
elif [ "$SYS" = "MACOSX" ]; then
echo "SOSUFFIX=dylib" >> config.mak
echo "SONAME=libx264.$API.dylib" >> config.mak
- echo 'SOFLAGS=-dynamiclib -Wl,-single_module -Wl,-read_only_relocs,suppress -install_name $(DESTDIR)$(libdir)/$(SONAME)' >> config.mak
+ echo 'SOFLAGS=-shared -dynamiclib -Wl,-single_module -Wl,-read_only_relocs,suppress -install_name $(DESTDIR)$(libdir)/$(SONAME)' >> config.mak
elif [ "$SYS" = "SunOS" ]; then
echo "SOSUFFIX=so" >> config.mak
echo "SONAME=libx264.so.$API" >> config.mak
- echo 'SOFLAGS=-Wl,-h,$(SONAME)' >> config.mak
+ echo 'SOFLAGS=-shared -Wl,-h,$(SONAME)' >> config.mak
else
echo "SOSUFFIX=so" >> config.mak
echo "SONAME=libx264.so.$API" >> config.mak
- echo 'SOFLAGS=-Wl,-soname,$(SONAME)' >> config.mak
+ echo 'SOFLAGS=-shared -Wl,-soname,$(SONAME)' >> config.mak
fi
- echo 'default: $(SONAME)' >> config.mak
+ echo 'default: lib-shared' >> config.mak
+ echo 'install: install-lib-shared' >> config.mak
fi
+if [ "$static" = "yes" ]; then
+ echo 'default: lib-static' >> config.mak
+ echo 'install: install-lib-static' >> config.mak
+fi
+
+if [ "$cli_libx264" = "system" ] ; then
+ if [ "$shared" = "yes" ]; then
+ CLI_LIBX264='$(SONAME)'
+ elif ${cross_prefix}pkg-config --exists x264 2>/dev/null; then
+ LDFLAGSCLI="$LDFLAGSCLI $(${cross_prefix}pkg-config --libs x264)"
+ CLI_LIBX264=
+ else
+ die "Can not find system libx264"
+ fi
+else
+ CLI_LIBX264='$(LIBX264)'
+fi
+echo "LDFLAGSCLI = $LDFLAGSCLI" >> config.mak
+echo "CLI_LIBX264 = $CLI_LIBX264" >> config.mak
+
./version.sh >> config.h
pclibs="-L$libdir -lx264 $libpthread"
@@ -849,7 +1067,12 @@
cat > conftest.log <<EOF
Platform: $ARCH
System: $SYS
+cli: $cli
+libx264: $cli_libx264
+shared: $shared
+static: $static
asm: $asm
+interlaced: $interlaced
avs: $avs
lavf: $lavf
ffms: $ffms
@@ -859,8 +1082,8 @@
filters: $filters
debug: $debug
gprof: $gprof
+strip: $strip
PIC: $pic
-shared: $shared
visualize: $vis
bit depth: $bit_depth
EOF
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/encoder/analyse.c
^
|
@@ -26,8 +26,6 @@
*****************************************************************************/
#define _ISOC99_SOURCE
-#include <math.h>
-#include <unistd.h>
#include "common/common.h"
#include "macroblock.h"
@@ -138,7 +136,8 @@
} x264_mb_analysis_t;
/* lambda = pow(2,qp/6-2) */
-const uint16_t x264_lambda_tab[QP_MAX_MAX+1] = {
+const uint16_t x264_lambda_tab[QP_MAX_MAX+1] =
+{
1, 1, 1, 1, 1, 1, 1, 1, /* 0- 7 */
1, 1, 1, 1, 1, 1, 1, 1, /* 8-15 */
2, 2, 2, 2, 3, 3, 3, 4, /* 16-23 */
@@ -154,7 +153,8 @@
/* lambda2 = pow(lambda,2) * .9 * 256 */
/* Capped to avoid overflow */
-const int x264_lambda2_tab[QP_MAX_MAX+1] = {
+const int x264_lambda2_tab[QP_MAX_MAX+1] =
+{
14, 18, 22, 28, 36, 45, 57, 72, /* 0- 7 */
91, 115, 145, 182, 230, 290, 365, 460, /* 8-15 */
580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16-23 */
@@ -168,14 +168,16 @@
134217727,134217727,134217727,134217727,134217727,134217727, /* 76-81 */
};
-const uint8_t x264_exp2_lut[64] = {
+const uint8_t x264_exp2_lut[64] =
+{
0, 3, 6, 8, 11, 14, 17, 20, 23, 26, 29, 32, 36, 39, 42, 45,
48, 52, 55, 58, 62, 65, 69, 72, 76, 80, 83, 87, 91, 94, 98, 102,
106, 110, 114, 118, 122, 126, 130, 135, 139, 143, 147, 152, 156, 161, 165, 170,
175, 179, 184, 189, 194, 198, 203, 208, 214, 219, 224, 229, 234, 240, 245, 250
};
-const float x264_log2_lut[128] = {
+const float x264_log2_lut[128] =
+{
0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
@@ -195,13 +197,15 @@
};
/* Avoid an int/float conversion. */
-const float x264_log2_lz_lut[32] = {
+const float x264_log2_lz_lut[32] =
+{
31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
};
// should the intra and inter lambdas be different?
// I'm just matching the behaviour of deadzone quant.
-static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] = {
+static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] =
+{
// inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
{
46, 58, 73, 92, 117, 147,
@@ -238,7 +242,8 @@
};
#define MAX_CHROMA_LAMBDA_OFFSET 36
-static const uint16_t x264_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1] = {
+static const uint16_t x264_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1] =
+{
16, 20, 25, 32, 40, 50,
64, 80, 101, 128, 161, 203,
256, 322, 406, 512, 645, 812,
@@ -249,16 +254,20 @@
};
/* TODO: calculate CABAC costs */
-static const uint8_t i_mb_b_cost_table[X264_MBTYPE_MAX] = {
+static const uint8_t i_mb_b_cost_table[X264_MBTYPE_MAX] =
+{
9, 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
};
-static const uint8_t i_mb_b16x8_cost_table[17] = {
+static const uint8_t i_mb_b16x8_cost_table[17] =
+{
0, 0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
};
-static const uint8_t i_sub_mb_b_cost_table[13] = {
+static const uint8_t i_sub_mb_b_cost_table[13] =
+{
7, 5, 5, 3, 7, 5, 7, 3, 7, 7, 7, 5, 1
};
-static const uint8_t i_sub_mb_p_cost_table[4] = {
+static const uint8_t i_sub_mb_p_cost_table[4] =
+{
5, 3, 3, 1
};
@@ -267,7 +276,18 @@
static uint16_t x264_cost_ref[QP_MAX+1][3][33];
static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
-int x264_analyse_init_costs( x264_t *h, int qp )
+float *x264_analyse_prepare_costs( x264_t *h )
+{
+ float *logs = x264_malloc( (2*4*2048+1)*sizeof(float) );
+ if( !logs )
+ return NULL;
+ logs[0] = 0.718f;
+ for( int i = 1; i <= 2*4*2048; i++ )
+ logs[i] = log2f(i+1)*2 + 1.718f;
+ return logs;
+}
+
+int x264_analyse_init_costs( x264_t *h, float *logs, int qp )
{
int lambda = x264_lambda_tab[qp];
if( h->cost_mv[qp] )
@@ -278,7 +298,7 @@
for( int i = 0; i <= 2*4*2048; i++ )
{
h->cost_mv[qp][-i] =
- h->cost_mv[qp][i] = X264_MIN( lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f, (1<<16)-1 );
+ h->cost_mv[qp][i] = X264_MIN( lambda * logs[i] + .5f, (1<<16)-1 );
}
x264_pthread_mutex_lock( &cost_ref_mutex );
for( int i = 0; i < 3; i++ )
@@ -320,7 +340,7 @@
{
x264_frame_t *frame = h->fref[0][j];
int width = frame->i_width[0] + 2*PADH;
- int i_padv = PADV << h->param.b_interlaced;
+ int i_padv = PADV << PARAM_INTERLACED;
int offset, height;
pixel *src = frame->filtered[0] - frame->i_stride[0]*i_padv - PADH;
height = X264_MIN( 16 + end + i_padv, h->fref[0][j]->i_lines[0] + i_padv*2 ) - h->fenc->i_lines_weighted;
@@ -428,7 +448,7 @@
/* Calculate max allowed MV range */
#define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range-1 )
h->mb.mv_min[0] = 4*( -16*h->mb.i_mb_x - 24 );
- h->mb.mv_max[0] = 4*( 16*( h->sps->i_mb_width - h->mb.i_mb_x - 1 ) + 24 );
+ h->mb.mv_max[0] = 4*( 16*( h->mb.i_mb_width - h->mb.i_mb_x - 1 ) + 24 );
h->mb.mv_min_spel[0] = CLIP_FMV( h->mb.mv_min[0] );
h->mb.mv_max_spel[0] = CLIP_FMV( h->mb.mv_max[0] );
if( h->param.b_intra_refresh && h->sh.i_type == SLICE_TYPE_P )
@@ -441,15 +461,14 @@
}
h->mb.mv_min_fpel[0] = (h->mb.mv_min_spel[0]>>2) + i_fpel_border;
h->mb.mv_max_fpel[0] = (h->mb.mv_max_spel[0]>>2) - i_fpel_border;
- if( h->mb.i_mb_x == 0 )
+ if( h->mb.i_mb_x == 0 && !(h->mb.i_mb_y & PARAM_INTERLACED) )
{
- int mb_y = h->mb.i_mb_y >> h->sh.b_mbaff;
- int mb_height = h->sps->i_mb_height >> h->sh.b_mbaff;
+ int mb_y = h->mb.i_mb_y >> SLICE_MBAFF;
int thread_mvy_range = i_fmv_range;
if( h->i_thread_frames > 1 )
{
- int pix_y = (h->mb.i_mb_y | h->mb.b_interlaced) * 16;
+ int pix_y = (h->mb.i_mb_y | PARAM_INTERLACED) * 16;
int thresh = pix_y + h->param.analyse.i_mv_range_thread;
for( int i = (h->sh.i_type == SLICE_TYPE_B); i >= 0; i-- )
for( int j = 0; j < h->i_ref[i]; j++ )
@@ -460,19 +479,48 @@
if( h->param.b_deterministic )
thread_mvy_range = h->param.analyse.i_mv_range_thread;
- if( h->mb.b_interlaced )
+ if( PARAM_INTERLACED )
thread_mvy_range >>= 1;
x264_analyse_weight_frame( h, pix_y + thread_mvy_range );
}
- h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
- h->mb.mv_max[1] = 4*( 16*( mb_height - mb_y - 1 ) + 24 );
- h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], -i_fmv_range, i_fmv_range );
- h->mb.mv_max_spel[1] = CLIP_FMV( h->mb.mv_max[1] );
- h->mb.mv_max_spel[1] = X264_MIN( h->mb.mv_max_spel[1], thread_mvy_range*4 );
- h->mb.mv_min_fpel[1] = (h->mb.mv_min_spel[1]>>2) + i_fpel_border;
- h->mb.mv_max_fpel[1] = (h->mb.mv_max_spel[1]>>2) - i_fpel_border;
+ if( PARAM_INTERLACED )
+ {
+ /* 0 == top progressive, 1 == bot progressive, 2 == interlaced */
+ for( int i = 0; i < 3; i++ )
+ {
+ int j = i == 2;
+ mb_y = (h->mb.i_mb_y >> j) + (i == 1);
+ h->mb.mv_miny_row[i] = 4*( -16*mb_y - 24 );
+ h->mb.mv_maxy_row[i] = 4*( 16*( (h->mb.i_mb_height>>j) - mb_y - 1 ) + 24 );
+ h->mb.mv_miny_spel_row[i] = x264_clip3( h->mb.mv_miny_row[i], -i_fmv_range, i_fmv_range );
+ h->mb.mv_maxy_spel_row[i] = CLIP_FMV( h->mb.mv_maxy_row[i] );
+ h->mb.mv_maxy_spel_row[i] = X264_MIN( h->mb.mv_maxy_spel_row[i], thread_mvy_range*4 );
+ h->mb.mv_miny_fpel_row[i] = (h->mb.mv_miny_spel_row[i]>>2) + i_fpel_border;
+ h->mb.mv_maxy_fpel_row[i] = (h->mb.mv_maxy_spel_row[i]>>2) - i_fpel_border;
+ }
+ }
+ else
+ {
+ h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
+ h->mb.mv_max[1] = 4*( 16*( h->mb.i_mb_height - mb_y - 1 ) + 24 );
+ h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], -i_fmv_range, i_fmv_range );
+ h->mb.mv_max_spel[1] = CLIP_FMV( h->mb.mv_max[1] );
+ h->mb.mv_max_spel[1] = X264_MIN( h->mb.mv_max_spel[1], thread_mvy_range*4 );
+ h->mb.mv_min_fpel[1] = (h->mb.mv_min_spel[1]>>2) + i_fpel_border;
+ h->mb.mv_max_fpel[1] = (h->mb.mv_max_spel[1]>>2) - i_fpel_border;
+ }
+ }
+ if( PARAM_INTERLACED )
+ {
+ int i = MB_INTERLACED ? 2 : h->mb.i_mb_y&1;
+ h->mb.mv_min[1] = h->mb.mv_miny_row[i];
+ h->mb.mv_max[1] = h->mb.mv_maxy_row[i];
+ h->mb.mv_min_spel[1] = h->mb.mv_miny_spel_row[i];
+ h->mb.mv_max_spel[1] = h->mb.mv_maxy_spel_row[i];
+ h->mb.mv_min_fpel[1] = h->mb.mv_miny_fpel_row[i];
+ h->mb.mv_max_fpel[1] = h->mb.mv_maxy_fpel_row[i];
}
#undef CLIP_FMV
@@ -516,7 +564,7 @@
{
/* Always run in fast-intra mode for subme < 3 */
if( h->mb.i_subpel_refine > 2 &&
- ( IS_INTRA( h->mb.i_mb_type_left ) ||
+ ( IS_INTRA( h->mb.i_mb_type_left[0] ) ||
IS_INTRA( h->mb.i_mb_type_top ) ||
IS_INTRA( h->mb.i_mb_type_topleft ) ||
IS_INTRA( h->mb.i_mb_type_topright ) ||
@@ -1296,7 +1344,7 @@
/* early termination: if 16x16 chose ref 0, then evalute no refs older
* than those used by the neighbors */
if( i_maxref > 0 && (a->l0.me16x16.i_ref == 0 || a->l0.me16x16.i_ref == h->mb.ref_blind_dupe) &&
- h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left > 0 )
+ h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0 )
{
i_maxref = 0;
CHECK_NEIGHBOUR( -8 - 1 );
@@ -1565,7 +1613,7 @@
const int or = 8*(i8x8&1) + 2*(i8x8&2)*i_stride;
const int oe = 4*(i8x8&1) + 2*(i8x8&2)*FENC_STRIDE;
const int i_ref = a->l0.me8x8[i8x8].i_ref;
- const int mvy_offset = h->mb.b_interlaced & i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
+ const int mvy_offset = MB_INTERLACED & i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
x264_weight_t *weight = h->sh.weight[i_ref];
// FIXME weight can be done on 4x4 blocks even if mc is smaller
@@ -1722,8 +1770,8 @@
#define COST_BI_CHROMA( m0, m1, width, height ) \
{ \
- l0_mvy_offset = h->mb.b_interlaced & m0.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
- l1_mvy_offset = h->mb.b_interlaced & m1.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
+ l0_mvy_offset = MB_INTERLACED & m0.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
+ l1_mvy_offset = MB_INTERLACED & m1.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
h->mc.mc_chroma( pix[0], pix[1], 8, m0.p_fref[4], m0.i_stride[1], m0.mv[0], m0.mv[1] + l0_mvy_offset, width, height ); \
h->mc.mc_chroma( pix[2], pix[3], 8, m1.p_fref[4], m1.i_stride[1], m1.mv[0], m1.mv[1] + l1_mvy_offset, width, height ); \
h->mc.avg[i_pixel+3]( bi[0], 8, pix[0], 8, pix[2], 8, h->mb.bipred_weight[m0.i_ref][m1.i_ref] ); \
@@ -1907,18 +1955,18 @@
ALIGNED_ARRAY_16( pixel, pixuv, [2],[8*FENC_STRIDE] );
ALIGNED_ARRAY_16( pixel, bi, [8*FENC_STRIDE] );
- if( h->mb.b_interlaced & a->l0.bi16x16.i_ref )
+ if( MB_INTERLACED & a->l0.bi16x16.i_ref )
{
- int l0_mvy_offset = h->mb.b_interlaced & a->l0.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
+ int l0_mvy_offset = MB_INTERLACED & a->l0.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
h->mc.mc_chroma( pixuv[0], pixuv[0]+8, FENC_STRIDE, h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4],
h->mb.pic.i_stride[1], 0, 0 + l0_mvy_offset, 8, 8 );
}
else
h->mc.load_deinterleave_8x8x2_fenc( pixuv[0], h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4], h->mb.pic.i_stride[1] );
- if( h->mb.b_interlaced & a->l1.bi16x16.i_ref )
+ if( MB_INTERLACED & a->l1.bi16x16.i_ref )
{
- int l1_mvy_offset = h->mb.b_interlaced & a->l1.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
+ int l1_mvy_offset = MB_INTERLACED & a->l1.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
h->mc.mc_chroma( pixuv[1], pixuv[1]+8, FENC_STRIDE, h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4],
h->mb.pic.i_stride[1], 0, 0 + l1_mvy_offset, 8, 8 );
}
@@ -2063,7 +2111,7 @@
{
x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
if( i_maxref[l] > 0 && lX->me16x16.i_ref == 0 &&
- h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left > 0 )
+ h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0 )
{
i_maxref[l] = 0;
CHECK_NEIGHBOUR( -8 - 1 );
@@ -2809,15 +2857,19 @@
}
else
{
+ int skip_invalid = h->i_thread_frames > 1 && h->mb.cache.pskip_mv[1] > h->mb.mv_max_spel[1];
+ /* If the current macroblock is off the frame, just skip it. */
+ if( HAVE_INTERLACED && !MB_INTERLACED && h->mb.i_mb_y * 16 >= h->param.i_height && !skip_invalid )
+ b_skip = 1;
/* Fast P_SKIP detection */
- if( h->param.analyse.b_fast_pskip )
+ else if( h->param.analyse.b_fast_pskip )
{
- if( h->i_thread_frames > 1 && h->mb.cache.pskip_mv[1] > h->mb.mv_max_spel[1] )
+ if( skip_invalid )
// FIXME don't need to check this if the reference frame is done
{}
else if( h->param.analyse.i_subpel_refine >= 3 )
analysis.b_try_skip = 1;
- else if( h->mb.i_mb_type_left == P_SKIP ||
+ else if( h->mb.i_mb_type_left[0] == P_SKIP ||
h->mb.i_mb_type_top == P_SKIP ||
h->mb.i_mb_type_topleft == P_SKIP ||
h->mb.i_mb_type_topright == P_SKIP )
@@ -3139,7 +3191,10 @@
{
if( !h->mb.b_direct_auto_write )
x264_mb_mc( h );
- if( analysis.i_mbrd )
+ /* If the current macroblock is off the frame, just skip it. */
+ if( HAVE_INTERLACED && !MB_INTERLACED && h->mb.i_mb_y * 16 >= h->param.i_height )
+ b_skip = 1;
+ else if( analysis.i_mbrd )
{
i_bskip_cost = ssd_mb( h );
/* 6 = minimum cavlc cost of a non-skipped MB */
@@ -3657,8 +3712,8 @@
int ref = h->mb.cache.ref[l][x264_scan8[0]];
if( ref < 0 )
continue;
- completed = h->fref[l][ ref >> h->mb.b_interlaced ]->orig->i_lines_completed;
- if( (h->mb.cache.mv[l][x264_scan8[15]][1] >> (2 - h->mb.b_interlaced)) + h->mb.i_mb_y*16 > completed )
+ completed = h->fref[l][ ref >> MB_INTERLACED ]->orig->i_lines_completed;
+ if( (h->mb.cache.mv[l][x264_scan8[15]][1] >> (2 - MB_INTERLACED)) + h->mb.i_mb_y*16 > completed )
{
x264_log( h, X264_LOG_WARNING, "internal error (MV out of thread range)\n");
x264_log( h, X264_LOG_DEBUG, "mb type: %d \n", h->mb.i_type);
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/encoder/analyse.h
^
|
@@ -27,7 +27,8 @@
#ifndef X264_ANALYSE_H
#define X264_ANALYSE_H
-int x264_analyse_init_costs( x264_t *h, int qp );
+float *x264_analyse_prepare_costs( x264_t *h );
+int x264_analyse_init_costs( x264_t *h, float *logs, int qp );
void x264_analyse_free_costs( x264_t *h );
void x264_analyse_weight_frame( x264_t *h, int end );
void x264_macroblock_analyse( x264_t *h );
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/encoder/cabac.c
^
|
@@ -66,20 +66,36 @@
}
}
+#if !RDO_SKIP_BS
+static void x264_cabac_field_decoding_flag( x264_t *h, x264_cabac_t *cb )
+{
+ int ctx = 0;
+ ctx += h->mb.field_decoding_flag & !!h->mb.i_mb_x;
+ ctx += (h->mb.i_mb_top_mbpair_xy >= 0
+ && h->mb.slice_table[h->mb.i_mb_top_mbpair_xy] == h->sh.i_first_mb
+ && h->mb.field[h->mb.i_mb_top_mbpair_xy]);
+
+ x264_cabac_encode_decision_noup( cb, 70 + ctx, MB_INTERLACED );
+ h->mb.field_decoding_flag = MB_INTERLACED;
+}
+#endif
+
static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
{
const int i_mb_type = h->mb.i_type;
- if( h->sh.b_mbaff &&
+#if !RDO_SKIP_BS
+ if( SLICE_MBAFF &&
(!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
{
- x264_cabac_encode_decision_noup( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced );
+ x264_cabac_field_decoding_flag( h, cb );
}
+#endif
if( h->sh.i_type == SLICE_TYPE_I )
{
int ctx = 0;
- if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left != I_4x4 )
+ if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != I_4x4 )
ctx++;
if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != I_4x4 )
ctx++;
@@ -113,7 +129,7 @@
else //if( h->sh.i_type == SLICE_TYPE_B )
{
int ctx = 0;
- if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
+ if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != B_SKIP && h->mb.i_mb_type_left[0] != B_DIRECT )
ctx++;
if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
ctx++;
@@ -198,7 +214,7 @@
int ctx = 0;
/* No need to test for I4x4 or I_16x16 as cache_save handle that */
- if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_left_xy] != 0 )
+ if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_left_xy[0]] != 0 )
ctx++;
if( (h->mb.i_neighbour & MB_TOP) && h->mb.chroma_pred_mode[h->mb.i_mb_top_xy] != 0 )
ctx++;
@@ -280,9 +296,9 @@
#if !RDO_SKIP_BS
void x264_cabac_mb_skip( x264_t *h, int b_skip )
{
- int ctx = ((h->mb.i_neighbour & MB_LEFT) && !IS_SKIP( h->mb.i_mb_type_left ))
- + ((h->mb.i_neighbour & MB_TOP) && !IS_SKIP( h->mb.i_mb_type_top ))
- + (h->sh.i_type == SLICE_TYPE_P ? 11 : 24);
+ int ctx = h->mb.cache.i_neighbour_skip + 11;
+ if( h->sh.i_type != SLICE_TYPE_P )
+ ctx += 13;
x264_cabac_encode_decision( &h->cabac, ctx, b_skip );
}
#endif
@@ -335,7 +351,7 @@
const int i8 = x264_scan8[idx];
const int i_refa = h->mb.cache.ref[i_list][i8 - 1];
const int i_refb = h->mb.cache.ref[i_list][i8 - 8];
- int ctx = 0;
+ int ctx = 0;
if( i_refa > 0 && !h->mb.cache.skip[i8 - 1] )
ctx++;
@@ -365,7 +381,7 @@
for( int i = 1; i < i_abs; i++ )
x264_cabac_encode_decision( cb, ctxbase + i + 2, 1 );
x264_cabac_encode_decision( cb, ctxbase + i_abs + 2, 0 );
- x264_cabac_encode_bypass( cb, mvd < 0 );
+ x264_cabac_encode_bypass( cb, mvd >> 31 );
}
else
{
@@ -405,12 +421,12 @@
x264_cabac_encode_decision( cb, ctxbase + ctxes[i-1], 1 );
x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 );
}
- x264_cabac_encode_bypass( cb, mvd < 0 );
+ x264_cabac_encode_bypass( cb, mvd >> 31 );
}
#endif
- /* Since we don't need to keep track of MVDs larger than 33, just cap the value.
+ /* Since we don't need to keep track of MVDs larger than 66, just cap the value.
* This lets us store MVDs as 8-bit values instead of 16-bit. */
- return X264_MIN( i_abs, 33 );
+ return X264_MIN( i_abs, 66 );
}
static NOINLINE uint16_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width )
@@ -510,11 +526,13 @@
}
-static const uint16_t significant_coeff_flag_offset[2][6] = {
+static const uint16_t significant_coeff_flag_offset[2][6] =
+{
{ 105, 120, 134, 149, 152, 402 },
{ 277, 292, 306, 321, 324, 436 }
};
-static const uint16_t last_coeff_flag_offset[2][6] = {
+static const uint16_t last_coeff_flag_offset[2][6] =
+{
{ 166, 181, 195, 210, 213, 417 },
{ 338, 353, 367, 382, 385, 451 }
};
@@ -532,7 +550,8 @@
9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14
}};
-static const uint8_t last_coeff_flag_offset_8x8[63] = {
+static const uint8_t last_coeff_flag_offset_8x8[63] =
+{
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
@@ -556,78 +575,70 @@
#if !RDO_SKIP_BS
static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
{
- const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][ctx_block_cat];
- const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][ctx_block_cat];
- const int i_ctx_level = coeff_abs_level_m1_offset[ctx_block_cat];
- const uint8_t *sig_offset = significant_coeff_flag_offset_8x8[h->mb.b_interlaced];
- int i_coeff_abs_m1[64];
- int i_coeff_sign[64];
- int i_coeff = 0;
- int i_last;
- int node_ctx = 0;
- int i = 0;
+ const uint8_t *sig_offset = significant_coeff_flag_offset_8x8[MB_INTERLACED];
+ int ctx_sig = significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
+ int ctx_last = last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
+ int ctx_level = coeff_abs_level_m1_offset[ctx_block_cat];
+ int coeff_idx = -1, node_ctx = 0, last;
+ int coeffs[64];
- i_last = h->quantf.coeff_last[ctx_block_cat](l);
+ last = h->quantf.coeff_last[ctx_block_cat]( l );
#define WRITE_SIGMAP( l8x8 )\
- while(1)\
+ int i = 0;\
+ while( 1 )\
{\
if( l[i] )\
{\
- i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;\
- i_coeff_sign[i_coeff] = l[i] < 0;\
- i_coeff++;\
- x264_cabac_encode_decision( cb, i_ctx_sig + (l8x8 ? sig_offset[i] : i), 1 );\
- if( i == i_last )\
+ coeffs[++coeff_idx] = l[i];\
+ x264_cabac_encode_decision( cb, ctx_sig + (l8x8 ? sig_offset[i] : i), 1 );\
+ if( i == last )\
{\
- x264_cabac_encode_decision( cb, i_ctx_last + (l8x8 ? last_coeff_flag_offset_8x8[i] : i), 1 );\
+ x264_cabac_encode_decision( cb, ctx_last + (l8x8 ? last_coeff_flag_offset_8x8[i] : i), 1 );\
break;\
}\
else\
- x264_cabac_encode_decision( cb, i_ctx_last + (l8x8 ? last_coeff_flag_offset_8x8[i] : i), 0 );\
+ x264_cabac_encode_decision( cb, ctx_last + (l8x8 ? last_coeff_flag_offset_8x8[i] : i), 0 );\
}\
else\
- x264_cabac_encode_decision( cb, i_ctx_sig + (l8x8 ? sig_offset[i] : i), 0 );\
+ x264_cabac_encode_decision( cb, ctx_sig + (l8x8 ? sig_offset[i] : i), 0 );\
i++;\
- if( i == i_count_m1 )\
+ if( i == count_m1 )\
{\
- i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;\
- i_coeff_sign[i_coeff] = l[i] < 0;\
- i_coeff++;\
+ coeffs[++coeff_idx] = l[i];\
break;\
}\
}
if( ctx_block_cat == DCT_LUMA_8x8 )
{
- const int i_count_m1 = 63;
+ int count_m1 = 63;
WRITE_SIGMAP( 1 )
}
else
{
- const int i_count_m1 = count_cat_m1[ctx_block_cat];
+ int count_m1 = count_cat_m1[ctx_block_cat];
WRITE_SIGMAP( 0 )
}
do
{
- int i_prefix, ctx;
- i_coeff--;
-
/* write coeff_abs - 1 */
- i_prefix = X264_MIN( i_coeff_abs_m1[i_coeff], 14 );
- ctx = coeff_abs_level1_ctx[node_ctx] + i_ctx_level;
+ int coeff = coeffs[coeff_idx];
+ int abs_coeff = abs(coeff);
+ int coeff_sign = coeff >> 31;
+ int ctx = coeff_abs_level1_ctx[node_ctx] + ctx_level;
- if( i_prefix )
+ if( abs_coeff > 1 )
{
x264_cabac_encode_decision( cb, ctx, 1 );
- ctx = coeff_abs_levelgt1_ctx[node_ctx] + i_ctx_level;
- for( i = 0; i < i_prefix - 1; i++ )
+ ctx = coeff_abs_levelgt1_ctx[node_ctx] + ctx_level;
+ for( int i = X264_MIN( abs_coeff, 15 ) - 2; i > 0; i-- )
x264_cabac_encode_decision( cb, ctx, 1 );
- if( i_prefix < 14 )
+ if( abs_coeff < 15 )
x264_cabac_encode_decision( cb, ctx, 0 );
else
- x264_cabac_encode_ue_bypass( cb, 0, i_coeff_abs_m1[i_coeff] - 14 );
+ x264_cabac_encode_ue_bypass( cb, 0, abs_coeff - 15 );
node_ctx = coeff_abs_level_transition[1][node_ctx];
}
@@ -637,8 +648,8 @@
node_ctx = coeff_abs_level_transition[0][node_ctx];
}
- x264_cabac_encode_bypass( cb, i_coeff_sign[i_coeff] );
- } while( i_coeff > 0 );
+ x264_cabac_encode_bypass( cb, coeff_sign );
+ } while( --coeff_idx >= 0 );
}
#define block_residual_write_cabac_8x8( h, cb, l ) block_residual_write_cabac( h, cb, DCT_LUMA_8x8, l )
@@ -650,37 +661,35 @@
* for this (~0.001db) and the speed boost (~30%) is worth it. */
static void ALWAYS_INLINE block_residual_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int b_8x8 )
{
- const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][ctx_block_cat];
- const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][ctx_block_cat];
- const int i_ctx_level = coeff_abs_level_m1_offset[ctx_block_cat];
- const uint8_t *sig_offset = significant_coeff_flag_offset_8x8[h->mb.b_interlaced];
- int i_last, i_coeff_abs, ctx, node_ctx;
-
- i_last = h->quantf.coeff_last[ctx_block_cat](l);
-
- i_coeff_abs = abs(l[i_last]);
- ctx = coeff_abs_level1_ctx[0] + i_ctx_level;
+ const uint8_t *sig_offset = significant_coeff_flag_offset_8x8[MB_INTERLACED];
+ int ctx_sig = significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
+ int ctx_last = last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
+ int ctx_level = coeff_abs_level_m1_offset[ctx_block_cat];
+ int last = h->quantf.coeff_last[ctx_block_cat]( l );
+ int coeff_abs = abs(l[last]);
+ int ctx = coeff_abs_level1_ctx[0] + ctx_level;
+ int node_ctx;
- if( i_last != (b_8x8 ? 63 : count_cat_m1[ctx_block_cat]) )
+ if( last != (b_8x8 ? 63 : count_cat_m1[ctx_block_cat]) )
{
- x264_cabac_encode_decision( cb, i_ctx_sig + (b_8x8?sig_offset[i_last]:i_last), 1 );
- x264_cabac_encode_decision( cb, i_ctx_last + (b_8x8?last_coeff_flag_offset_8x8[i_last]:i_last), 1 );
+ x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[last] : last), 1 );
+ x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? last_coeff_flag_offset_8x8[last] : last), 1 );
}
- if( i_coeff_abs > 1 )
+ if( coeff_abs > 1 )
{
x264_cabac_encode_decision( cb, ctx, 1 );
- ctx = coeff_abs_levelgt1_ctx[0] + i_ctx_level;
- if( i_coeff_abs < 15 )
+ ctx = coeff_abs_levelgt1_ctx[0] + ctx_level;
+ if( coeff_abs < 15 )
{
- cb->f8_bits_encoded += cabac_size_unary[i_coeff_abs-1][cb->state[ctx]];
- cb->state[ctx] = cabac_transition_unary[i_coeff_abs-1][cb->state[ctx]];
+ cb->f8_bits_encoded += cabac_size_unary[coeff_abs-1][cb->state[ctx]];
+ cb->state[ctx] = cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
}
else
{
cb->f8_bits_encoded += cabac_size_unary[14][cb->state[ctx]];
cb->state[ctx] = cabac_transition_unary[14][cb->state[ctx]];
- x264_cabac_encode_ue_bypass( cb, 0, i_coeff_abs - 15 );
+ x264_cabac_encode_ue_bypass( cb, 0, coeff_abs - 15 );
}
node_ctx = coeff_abs_level_transition[1][0];
}
@@ -691,29 +700,29 @@
x264_cabac_encode_bypass( cb, 0 ); // sign
}
- for( int i = i_last-1 ; i >= 0; i-- )
+ for( int i = last-1 ; i >= 0; i-- )
{
if( l[i] )
{
- i_coeff_abs = abs(l[i]);
- x264_cabac_encode_decision( cb, i_ctx_sig + (b_8x8?sig_offset[i]:i), 1 );
- x264_cabac_encode_decision( cb, i_ctx_last + (b_8x8?last_coeff_flag_offset_8x8[i]:i), 0 );
- ctx = coeff_abs_level1_ctx[node_ctx] + i_ctx_level;
+ coeff_abs = abs(l[i]);
+ x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[i] : i), 1 );
+ x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? last_coeff_flag_offset_8x8[i] : i), 0 );
+ ctx = coeff_abs_level1_ctx[node_ctx] + ctx_level;
- if( i_coeff_abs > 1 )
+ if( coeff_abs > 1 )
{
x264_cabac_encode_decision( cb, ctx, 1 );
- ctx = coeff_abs_levelgt1_ctx[node_ctx] + i_ctx_level;
- if( i_coeff_abs < 15 )
+ ctx = coeff_abs_levelgt1_ctx[node_ctx] + ctx_level;
+ if( coeff_abs < 15 )
{
- cb->f8_bits_encoded += cabac_size_unary[i_coeff_abs-1][cb->state[ctx]];
- cb->state[ctx] = cabac_transition_unary[i_coeff_abs-1][cb->state[ctx]];
+ cb->f8_bits_encoded += cabac_size_unary[coeff_abs-1][cb->state[ctx]];
+ cb->state[ctx] = cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
}
else
{
cb->f8_bits_encoded += cabac_size_unary[14][cb->state[ctx]];
cb->state[ctx] = cabac_transition_unary[14][cb->state[ctx]];
- x264_cabac_encode_ue_bypass( cb, 0, i_coeff_abs - 15 );
+ x264_cabac_encode_ue_bypass( cb, 0, coeff_abs - 15 );
}
node_ctx = coeff_abs_level_transition[1][node_ctx];
}
@@ -725,7 +734,7 @@
}
}
else
- x264_cabac_encode_decision( cb, i_ctx_sig + (b_8x8?sig_offset[i]:i), 0 );
+ x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[i] : i), 0 );
}
}
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/encoder/cavlc.c
^
|
@@ -100,9 +100,8 @@
/* Weight highly against overflows. */
s->i_bits_encoded += 2000;
#else
- x264_log(h, X264_LOG_WARNING, "OVERFLOW levelcode=%d is only allowed in High Profile\n", i_level_code );
- /* clip level, preserving sign */
- i_level_code = (1<<12) - 2 + (i_level_code & 1);
+ /* We've had an overflow; note it down and re-encode the MB later. */
+ h->mb.b_overflow = 1;
#endif
}
}
@@ -296,10 +295,10 @@
int i_mb_pos_tex;
#endif
- if( h->sh.b_mbaff
+ if( SLICE_MBAFF
&& (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
{
- bs_write1( s, h->mb.b_interlaced );
+ bs_write1( s, MB_INTERLACED );
}
#if !RDO_SKIP_BS
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/encoder/encoder.c
^
|
@@ -25,8 +25,6 @@
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
-#include <math.h>
-
#include "common/common.h"
#include "set.h"
@@ -104,7 +102,7 @@
sh->i_frame_num = i_frame;
- sh->b_mbaff = h->param.b_interlaced;
+ sh->b_mbaff = PARAM_INTERLACED;
sh->b_field_pic = 0; /* no field support for now */
sh->b_bottom_field = 0; /* not yet used */
@@ -183,8 +181,10 @@
{
if( sh->b_mbaff )
{
- assert( sh->i_first_mb % (2*sh->sps->i_mb_width) == 0 );
- bs_write_ue( s, sh->i_first_mb >> 1 );
+ int first_x = sh->i_first_mb % sh->sps->i_mb_width;
+ int first_y = sh->i_first_mb / sh->sps->i_mb_width;
+ assert( (first_y&1) == 0 );
+ bs_write_ue( s, (2*first_x + sh->sps->i_mb_width*(first_y&~1) + (first_y&1)) >> 1 );
}
else
bs_write_ue( s, sh->i_first_mb );
@@ -335,8 +335,9 @@
static int x264_bitstream_check_buffer( x264_t *h )
{
uint8_t *bs_bak = h->out.p_bitstream;
- if( (h->param.b_cabac && (h->cabac.p_end - h->cabac.p < 2500)) ||
- (h->out.bs.p_end - h->out.bs.p < 2500) )
+ int max_mb_size = 2500 << SLICE_MBAFF;
+ if( (h->param.b_cabac && (h->cabac.p_end - h->cabac.p < max_mb_size)) ||
+ (h->out.bs.p_end - h->out.bs.p < max_mb_size) )
{
h->out.i_bitstream += 100000;
CHECKED_MALLOC( h->out.p_bitstream, h->out.i_bitstream );
@@ -383,15 +384,15 @@
*
****************************************************************************/
-static int x264_validate_parameters( x264_t *h )
+static int x264_validate_parameters( x264_t *h, int b_open )
{
#if HAVE_MMX
#ifdef __SSE__
- if( !(x264_cpu_detect() & X264_CPU_SSE) )
+ if( b_open && !(x264_cpu_detect() & X264_CPU_SSE) )
{
x264_log( h, X264_LOG_ERROR, "your cpu does not support SSE1, but x264 was compiled with asm support\n");
#else
- if( !(x264_cpu_detect() & X264_CPU_MMXEXT) )
+ if( b_open && !(x264_cpu_detect() & X264_CPU_MMXEXT) )
{
x264_log( h, X264_LOG_ERROR, "your cpu does not support MMXEXT, but x264 was compiled with asm support\n");
#endif
@@ -419,6 +420,16 @@
return -1;
}
+#if HAVE_INTERLACED
+ h->param.b_interlaced = !!PARAM_INTERLACED;
+#else
+ if( h->param.b_interlaced )
+ {
+ x264_log( h, X264_LOG_ERROR, "not compiled with interlaced support\n" );
+ return -1;
+ }
+#endif
+
if( (h->param.crop_rect.i_left + h->param.crop_rect.i_right ) >= h->param.i_width ||
(h->param.crop_rect.i_top + h->param.crop_rect.i_bottom) >= h->param.i_height )
{
@@ -457,23 +468,10 @@
h->param.analyse.i_weighted_pred = 0;
}
- if( h->param.b_interlaced )
- {
- if( h->param.analyse.i_me_method >= X264_ME_ESA )
- {
- x264_log( h, X264_LOG_WARNING, "interlace + me=esa is not implemented\n" );
- h->param.analyse.i_me_method = X264_ME_UMH;
- }
- if( h->param.analyse.i_weighted_pred > 0 )
- {
- x264_log( h, X264_LOG_WARNING, "interlace + weightp is not implemented\n" );
- h->param.analyse.i_weighted_pred = X264_WEIGHTP_NONE;
- }
- }
-
h->param.i_frame_packing = x264_clip3( h->param.i_frame_packing, -1, 5 );
/* Detect default ffmpeg settings and terminate with an error. */
+ if( b_open )
{
int score = 0;
score += h->param.analyse.i_me_range == 0;
@@ -502,7 +500,11 @@
return -1;
}
h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, -QP_BD_OFFSET, 51 );
+ h->param.rc.f_rf_constant_max = x264_clip3f( h->param.rc.f_rf_constant_max, -QP_BD_OFFSET, 51 );
h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
+ h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 0, 10 );
+ h->param.rc.f_ip_factor = X264_MAX( h->param.rc.f_ip_factor, 0.01f );
+ h->param.rc.f_pb_factor = X264_MAX( h->param.rc.f_pb_factor, 0.01f );
if( h->param.rc.i_rc_method == X264_RC_CRF )
{
h->param.rc.i_qp_constant = h->param.rc.f_rf_constant + QP_BD_OFFSET;
@@ -538,9 +540,15 @@
h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, QP_MAX );
h->param.rc.i_aq_mode = 0;
h->param.rc.b_mb_tree = 0;
+ h->param.rc.i_bitrate = 0;
}
h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, QP_MAX );
h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
+ h->param.rc.i_qp_step = x264_clip3( h->param.rc.i_qp_step, 0, QP_MAX );
+ h->param.rc.i_bitrate = x264_clip3( h->param.rc.i_bitrate, 0, 2000000 );
+ h->param.rc.i_vbv_buffer_size = x264_clip3( h->param.rc.i_vbv_buffer_size, 0, 2000000 );
+ h->param.rc.i_vbv_max_bitrate = x264_clip3( h->param.rc.i_vbv_max_bitrate, 0, 2000000 );
+ h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init, 0, 2000000 );
if( h->param.rc.i_vbv_buffer_size )
{
if( h->param.rc.i_rc_method == X264_RC_CQP )
@@ -575,49 +583,58 @@
h->param.rc.i_vbv_max_bitrate = 0;
}
- if( h->param.b_interlaced && h->param.i_slice_max_size )
- {
- x264_log( h, X264_LOG_WARNING, "interlaced + slice-max-size is not implemented\n" );
- h->param.i_slice_max_size = 0;
- }
- if( h->param.b_interlaced && h->param.i_slice_max_mbs )
- {
- x264_log( h, X264_LOG_WARNING, "interlaced + slice-max-mbs is not implemented\n" );
- h->param.i_slice_max_mbs = 0;
- }
- int max_slices = (h->param.i_height+((16<<h->param.b_interlaced)-1))/(16<<h->param.b_interlaced);
+ h->param.i_slice_max_size = X264_MAX( h->param.i_slice_max_size, 0 );
+ h->param.i_slice_max_mbs = X264_MAX( h->param.i_slice_max_mbs, 0 );
+
+ int max_slices = (h->param.i_height+((16<<PARAM_INTERLACED)-1))/(16<<PARAM_INTERLACED);
if( h->param.b_sliced_threads )
h->param.i_slice_count = x264_clip3( h->param.i_threads, 0, max_slices );
else
{
h->param.i_slice_count = x264_clip3( h->param.i_slice_count, 0, max_slices );
- h->param.i_slice_max_size = X264_MAX( h->param.i_slice_max_size, 0 );
- h->param.i_slice_max_mbs = X264_MAX( h->param.i_slice_max_mbs, 0 );
if( h->param.i_slice_max_mbs || h->param.i_slice_max_size )
h->param.i_slice_count = 0;
}
+ if( h->param.b_bluray_compat )
+ {
+ h->param.i_bframe_pyramid = X264_MIN( X264_B_PYRAMID_STRICT, h->param.i_bframe_pyramid );
+ h->param.i_bframe = X264_MIN( h->param.i_bframe, 3 );
+ h->param.b_aud = 1;
+ h->param.i_nal_hrd = X264_MAX( h->param.i_nal_hrd, X264_NAL_HRD_VBR );
+ h->param.i_slice_max_size = 0;
+ h->param.i_slice_max_mbs = 0;
+ h->param.b_intra_refresh = 0;
+ h->param.i_frame_reference = X264_MIN( h->param.i_frame_reference, 6 );
+ h->param.i_dpb_size = X264_MIN( h->param.i_dpb_size, 6 );
+ /* Due to the proliferation of broken players that don't handle dupes properly. */
+ h->param.analyse.i_weighted_pred = X264_MIN( h->param.analyse.i_weighted_pred, X264_WEIGHTP_SIMPLE );
+ if( h->param.b_fake_interlaced )
+ h->param.b_pic_struct = 1;
+ }
+
h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, X264_REF_MAX );
h->param.i_dpb_size = x264_clip3( h->param.i_dpb_size, 1, X264_REF_MAX );
if( h->param.i_scenecut_threshold < 0 )
h->param.i_scenecut_threshold = 0;
+ h->param.analyse.i_direct_mv_pred = x264_clip3( h->param.analyse.i_direct_mv_pred, X264_DIRECT_PRED_NONE, X264_DIRECT_PRED_AUTO );
if( !h->param.analyse.i_subpel_refine && h->param.analyse.i_direct_mv_pred > X264_DIRECT_PRED_SPATIAL )
{
x264_log( h, X264_LOG_WARNING, "subme=0 + direct=temporal is not supported\n" );
h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
}
h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
- h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_BLURAY );
h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
if( h->param.i_bframe <= 1 )
h->param.i_bframe_pyramid = X264_B_PYRAMID_NONE;
h->param.i_bframe_pyramid = x264_clip3( h->param.i_bframe_pyramid, X264_B_PYRAMID_NONE, X264_B_PYRAMID_NORMAL );
+ h->param.i_bframe_adaptive = x264_clip3( h->param.i_bframe_adaptive, X264_B_ADAPT_NONE, X264_B_ADAPT_TRELLIS );
if( !h->param.i_bframe )
{
h->param.i_bframe_adaptive = X264_B_ADAPT_NONE;
h->param.analyse.i_direct_mv_pred = 0;
h->param.analyse.b_weighted_bipred = 0;
- h->param.i_open_gop = X264_OPEN_GOP_NONE;
+ h->param.b_open_gop = 0;
}
if( h->param.b_intra_refresh && h->param.i_bframe_pyramid == X264_B_PYRAMID_NORMAL )
{
@@ -630,10 +647,10 @@
h->param.i_frame_reference = 1;
h->param.i_dpb_size = 1;
}
- if( h->param.b_intra_refresh && h->param.i_open_gop )
+ if( h->param.b_intra_refresh && h->param.b_open_gop )
{
x264_log( h, X264_LOG_WARNING, "intra-refresh is not compatible with open-gop\n" );
- h->param.i_open_gop = X264_OPEN_GOP_NONE;
+ h->param.b_open_gop = 0;
}
float fps = h->param.i_fps_num > 0 && h->param.i_fps_den > 0 ? (float) h->param.i_fps_num / h->param.i_fps_den : 25.0;
if( h->param.i_keyint_min == X264_KEYINT_MIN_AUTO )
@@ -686,14 +703,12 @@
if( h->param.analyse.i_me_method < X264_ME_DIA ||
h->param.analyse.i_me_method > X264_ME_TESA )
h->param.analyse.i_me_method = X264_ME_HEX;
- if( h->param.analyse.i_me_range < 4 )
- h->param.analyse.i_me_range = 4;
+ h->param.analyse.i_me_range = x264_clip3( h->param.analyse.i_me_range, 4, 1024 );
if( h->param.analyse.i_me_range > 16 && h->param.analyse.i_me_method <= X264_ME_HEX )
h->param.analyse.i_me_range = 16;
if( h->param.analyse.i_me_method == X264_ME_TESA &&
(h->mb.b_lossless || h->param.analyse.i_subpel_refine <= 1) )
h->param.analyse.i_me_method = X264_ME_ESA;
- h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 0, 10 );
h->param.analyse.b_mixed_references = h->param.analyse.b_mixed_references && h->param.i_frame_reference > 1;
h->param.analyse.inter &= X264_ANALYSE_PSUB16x16|X264_ANALYSE_PSUB8x8|X264_ANALYSE_BSUB16x16|
X264_ANALYSE_I4x4|X264_ANALYSE_I8x8;
@@ -707,33 +722,57 @@
}
h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
+ h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 2 );
+ h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 );
+ if( h->param.rc.f_aq_strength == 0 )
+ h->param.rc.i_aq_mode = 0;
+
+ if( h->param.i_log_level < X264_LOG_INFO )
+ {
+ h->param.analyse.b_psnr = 0;
+ h->param.analyse.b_ssim = 0;
+ }
+ /* Warn users trying to measure PSNR/SSIM with psy opts on. */
+ if( b_open && (h->param.analyse.b_psnr || h->param.analyse.b_ssim) )
+ {
+ char *s = NULL;
+
+ if( h->param.analyse.b_psy )
+ {
+ s = h->param.analyse.b_psnr ? "psnr" : "ssim";
+ x264_log( h, X264_LOG_WARNING, "--%s used with psy on: results will be invalid!\n", s );
+ }
+ else if( !h->param.rc.i_aq_mode && h->param.analyse.b_ssim )
+ {
+ x264_log( h, X264_LOG_WARNING, "--ssim used with AQ off: results will be invalid!\n" );
+ s = "ssim";
+ }
+ else if( h->param.rc.i_aq_mode && h->param.analyse.b_psnr )
+ {
+ x264_log( h, X264_LOG_WARNING, "--psnr used with AQ on: results will be invalid!\n" );
+ s = "psnr";
+ }
+ if( s )
+ x264_log( h, X264_LOG_WARNING, "--tune %s should be used if attempting to benchmark %s!\n", s, s );
+ }
+
if( !h->param.analyse.b_psy )
{
h->param.analyse.f_psy_rd = 0;
h->param.analyse.f_psy_trellis = 0;
}
- if( !h->param.analyse.i_trellis )
- h->param.analyse.f_psy_trellis = 0;
h->param.analyse.f_psy_rd = x264_clip3f( h->param.analyse.f_psy_rd, 0, 10 );
h->param.analyse.f_psy_trellis = x264_clip3f( h->param.analyse.f_psy_trellis, 0, 10 );
- if( h->param.analyse.i_subpel_refine < 6 )
- h->param.analyse.f_psy_rd = 0;
- h->mb.i_psy_rd = FIX8( h->param.analyse.f_psy_rd );
+ h->mb.i_psy_rd = h->param.analyse.i_subpel_refine >= 6 ? FIX8( h->param.analyse.f_psy_rd ) : 0;
+ h->mb.i_psy_trellis = h->param.analyse.i_trellis ? FIX8( h->param.analyse.f_psy_trellis / 4 ) : 0;
/* Psy RDO increases overall quantizers to improve the quality of luma--this indirectly hurts chroma quality */
/* so we lower the chroma QP offset to compensate */
- /* This can be triggered repeatedly on multiple calls to parameter_validate, but since encoding
- * uses the pps chroma qp offset not the param chroma qp offset, this is not a problem. */
- if( h->mb.i_psy_rd )
+ if( b_open && h->mb.i_psy_rd )
h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_rd < 0.25 ? 1 : 2;
- h->mb.i_psy_trellis = FIX8( h->param.analyse.f_psy_trellis / 4 );
/* Psy trellis has a similar effect. */
- if( h->mb.i_psy_trellis )
+ if( b_open && h->mb.i_psy_trellis )
h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_trellis < 0.25 ? 1 : 2;
h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
- h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 2 );
- h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 );
- if( h->param.rc.f_aq_strength == 0 )
- h->param.rc.i_aq_mode = 0;
/* MB-tree requires AQ to be on, even if the strength is zero. */
if( !h->param.rc.i_aq_mode && h->param.rc.b_mb_tree )
{
@@ -768,12 +807,27 @@
}
}
if( h->param.analyse.i_mv_range <= 0 )
- h->param.analyse.i_mv_range = l->mv_range >> h->param.b_interlaced;
+ h->param.analyse.i_mv_range = l->mv_range >> PARAM_INTERLACED;
else
- h->param.analyse.i_mv_range = x264_clip3(h->param.analyse.i_mv_range, 32, 512 >> h->param.b_interlaced);
+ h->param.analyse.i_mv_range = x264_clip3(h->param.analyse.i_mv_range, 32, 512 >> PARAM_INTERLACED);
}
h->param.analyse.i_weighted_pred = x264_clip3( h->param.analyse.i_weighted_pred, X264_WEIGHTP_NONE, X264_WEIGHTP_SMART );
+
+ if( PARAM_INTERLACED )
+ {
+ if( h->param.analyse.i_me_method >= X264_ME_ESA )
+ {
+ x264_log( h, X264_LOG_WARNING, "interlace + me=esa is not implemented\n" );
+ h->param.analyse.i_me_method = X264_ME_UMH;
+ }
+ if( h->param.analyse.i_weighted_pred > 0 )
+ {
+ x264_log( h, X264_LOG_WARNING, "interlace + weightp is not implemented\n" );
+ h->param.analyse.i_weighted_pred = X264_WEIGHTP_NONE;
+ }
+ }
+
if( !h->param.analyse.i_weighted_pred && h->param.rc.b_mb_tree && h->param.analyse.b_psy )
h->param.analyse.i_weighted_pred = X264_WEIGHTP_FAKE;
@@ -800,6 +854,8 @@
h->param.analyse.i_mv_range_thread = r2;
}
+ if( h->param.rc.f_rate_tolerance < 0 )
+ h->param.rc.f_rate_tolerance = 0;
if( h->param.rc.f_qblur < 0 )
h->param.rc.f_qblur = 0;
if( h->param.rc.f_complexity_blur < 0 )
@@ -807,15 +863,11 @@
h->param.i_sps_id &= 31;
- if( h->param.i_log_level < X264_LOG_INFO )
- {
- h->param.analyse.b_psnr = 0;
- h->param.analyse.b_ssim = 0;
- }
-
- if( h->param.b_interlaced )
+ if( PARAM_INTERLACED )
h->param.b_pic_struct = 1;
+ h->param.i_nal_hrd = x264_clip3( h->param.i_nal_hrd, X264_NAL_HRD_NONE, X264_NAL_HRD_CBR );
+
if( h->param.i_nal_hrd && !h->param.rc.i_vbv_buffer_size )
{
x264_log( h, X264_LOG_WARNING, "NAL HRD parameters require VBV parameters\n" );
@@ -843,8 +895,12 @@
BOOLIFY( b_repeat_headers );
BOOLIFY( b_annexb );
BOOLIFY( b_vfr_input );
+ BOOLIFY( b_pulldown );
+ BOOLIFY( b_tff );
BOOLIFY( b_pic_struct );
BOOLIFY( b_fake_interlaced );
+ BOOLIFY( b_open_gop );
+ BOOLIFY( b_bluray_compat );
BOOLIFY( analyse.b_transform_8x8 );
BOOLIFY( analyse.b_weighted_bipred );
BOOLIFY( analyse.b_chroma_me );
@@ -937,7 +993,7 @@
goto fail;
}
- if( x264_validate_parameters( h ) < 0 )
+ if( x264_validate_parameters( h, 1 ) < 0 )
goto fail;
if( h->param.psz_cqm_file )
@@ -981,6 +1037,10 @@
h->mb.i_mb_width = h->sps->i_mb_width;
h->mb.i_mb_height = h->sps->i_mb_height;
h->mb.i_mb_count = h->mb.i_mb_width * h->mb.i_mb_height;
+ /* Adaptive MBAFF and subme 0 are not supported as we require halving motion
+ * vectors during prediction, resulting in hpel mvs.
+ * The chosen solution is to make MBAFF non-adaptive in this case. */
+ h->mb.b_adaptive_mbaff = PARAM_INTERLACED && h->param.analyse.i_subpel_refine;
/* Init frames. */
if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS && !h->param.rc.b_stat_read )
@@ -1032,14 +1092,17 @@
x264_predict_8x8c_init( h->param.cpu, h->predict_8x8c );
x264_predict_8x8_init( h->param.cpu, h->predict_8x8, &h->predict_8x8_filter );
x264_predict_4x4_init( h->param.cpu, h->predict_4x4 );
- if( !h->param.b_cabac )
- x264_init_vlc_tables();
+ if( h->param.b_cabac )
+ x264_cabac_init();
+ else
+ x264_cavlc_init();
x264_pixel_init( h->param.cpu, &h->pixf );
x264_dct_init( h->param.cpu, &h->dctf );
- x264_zigzag_init( h->param.cpu, &h->zigzagf, h->param.b_interlaced );
+ x264_zigzag_init( h->param.cpu, &h->zigzagf_progressive, &h->zigzagf_interlaced );
+ memcpy( &h->zigzagf, PARAM_INTERLACED ? &h->zigzagf_interlaced : &h->zigzagf_progressive, sizeof(h->zigzagf) );
x264_mc_init( h->param.cpu, &h->mc );
x264_quant_init( h, h->param.cpu, &h->quantf );
- x264_deblock_init( h->param.cpu, &h->loopf );
+ x264_deblock_init( h->param.cpu, &h->loopf, PARAM_INTERLACED );
x264_bitstream_init( h->param.cpu, &h->bsf );
x264_dct_init_weights();
@@ -1065,11 +1128,15 @@
p += sprintf( p, " none!" );
x264_log( h, X264_LOG_INFO, "%s\n", buf );
+ float *logs = x264_analyse_prepare_costs( h );
+ if( !logs )
+ goto fail;
for( qp = X264_MIN( h->param.rc.i_qp_min, QP_MAX_SPEC ); qp <= h->param.rc.i_qp_max; qp++ )
- if( x264_analyse_init_costs( h, qp ) )
+ if( x264_analyse_init_costs( h, logs, qp ) )
goto fail;
- if( x264_analyse_init_costs( h, X264_LOOKAHEAD_QP ) )
+ if( x264_analyse_init_costs( h, logs, X264_LOOKAHEAD_QP ) )
goto fail;
+ x264_free( logs );
static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 };
/* Checks for known miscompilation issues. */
@@ -1096,8 +1163,8 @@
* ( h->param.rc.i_rc_method == X264_RC_ABR ? pow( 0.95, h->param.rc.i_qp_min )
: pow( 0.95, h->param.rc.i_qp_constant ) * X264_MAX( 1, h->param.rc.f_ip_factor )));
- CHECKED_MALLOC( h->nal_buffer, h->out.i_bitstream * 3/2 + 4 );
h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4;
+ CHECKED_MALLOC( h->nal_buffer, h->nal_buffer_size );
if( h->param.i_threads > 1 &&
x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
@@ -1248,27 +1315,22 @@
if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 &&
param->rc.i_vbv_max_bitrate > 0 && param->rc.i_vbv_buffer_size > 0 )
{
+ rc_reconfig |= h->param.rc.i_vbv_max_bitrate != param->rc.i_vbv_max_bitrate;
+ rc_reconfig |= h->param.rc.i_vbv_buffer_size != param->rc.i_vbv_buffer_size;
+ rc_reconfig |= h->param.rc.i_bitrate != param->rc.i_bitrate;
COPY( rc.i_vbv_max_bitrate );
COPY( rc.i_vbv_buffer_size );
COPY( rc.i_bitrate );
- rc_reconfig = 1;
}
- if( h->param.rc.f_rf_constant != param->rc.f_rf_constant )
- {
- COPY( rc.f_rf_constant );
- rc_reconfig = 1;
- }
- if( h->param.rc.f_rf_constant_max != param->rc.f_rf_constant_max )
- {
- COPY( rc.f_rf_constant_max );
- rc_reconfig = 1;
- }
-
+ rc_reconfig |= h->param.rc.f_rf_constant != param->rc.f_rf_constant;
+ rc_reconfig |= h->param.rc.f_rf_constant_max != param->rc.f_rf_constant_max;
+ COPY( rc.f_rf_constant );
+ COPY( rc.f_rf_constant_max );
#undef COPY
mbcmp_init( h );
- int ret = x264_validate_parameters( h );
+ int ret = x264_validate_parameters( h, 0 );
/* Supported reconfiguration options (1-pass only):
* vbv-maxrate
@@ -1347,9 +1409,11 @@
nal_size += h->out.nal[i].i_payload;
/* Worst-case NAL unit escaping: reallocate the buffer if it's too small. */
- if( h->nal_buffer_size < nal_size * 3/2 + h->out.i_nal * 4 )
+ int necessary_size = nal_size * 3/2 + h->out.i_nal * 4;
+ if( h->nal_buffer_size < necessary_size )
{
- uint8_t *buf = x264_malloc( nal_size * 2 + h->out.i_nal * 4 );
+ h->nal_buffer_size = necessary_size * 2;
+ uint8_t *buf = x264_malloc( h->nal_buffer_size );
if( !buf )
return -1;
if( previous_nal_size )
@@ -1404,6 +1468,8 @@
return -1;
frame_size = x264_encoder_encapsulate_nals( h, 0 );
+ if( frame_size < 0 )
+ return -1;
/* now set output*/
*pi_nal = h->out.i_nal;
@@ -1489,7 +1555,7 @@
// and duplicates of that frame.
h->fenc->i_lines_weighted = 0;
- for( int i_ref = 0; i_ref < (h->i_ref[0] << h->sh.b_mbaff); i_ref++ )
+ for( int i_ref = 0; i_ref < (h->i_ref[0] << SLICE_MBAFF); i_ref++ )
for( int i = 0; i < 3; i++ )
h->sh.weight[i_ref][i].weightfn = NULL;
@@ -1497,7 +1563,7 @@
if( h->sh.i_type != SLICE_TYPE_P || h->param.analyse.i_weighted_pred <= 0 )
return;
- int i_padv = PADV << h->param.b_interlaced;
+ int i_padv = PADV << PARAM_INTERLACED;
int denom = -1;
int weightplane[2] = { 0, 0 };
int buffer_next = 0;
@@ -1628,6 +1694,10 @@
h->i_ref[0] = X264_MIN( h->i_ref[0], h->frames.i_max_ref0 );
h->i_ref[0] = X264_MIN( h->i_ref[0], h->param.i_frame_reference ); // if reconfig() has lowered the limit
+ /* For Blu-ray compliance, don't reference frames outside of the minigop. */
+ if( IS_X264_TYPE_B( h->fenc->i_type ) && h->param.b_bluray_compat )
+ h->i_ref[0] = X264_MIN( h->i_ref[0], IS_X264_TYPE_B( h->fref[0][0]->i_type ) + 1 );
+
/* add duplicates */
if( h->fenc->i_type == X264_TYPE_P )
{
@@ -1676,24 +1746,37 @@
int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
int b_end = mb_y == h->i_threadslice_end;
int b_measure_quality = 1;
- int min_y = mb_y - (1 << h->sh.b_mbaff);
+ int min_y = mb_y - (1 << SLICE_MBAFF);
int b_start = min_y == h->i_threadslice_start;
- int max_y = b_end ? h->i_threadslice_end : mb_y;
+ /* Even in interlaced mode, deblocking never modifies more than 4 pixels
+ * above each MB, as bS=4 doesn't happen for the top of interlaced mbpairs. */
+ int minpix_y = min_y*16 - 4 * !b_start;
+ int maxpix_y = mb_y*16 - 4 * !b_end;
b_deblock &= b_hpel || h->param.psz_dump_yuv;
if( h->param.b_sliced_threads && b_start && min_y && !b_inloop )
{
b_deblock = 0; /* We already deblocked on the inloop pass. */
b_measure_quality = 0; /* We already measured quality on the inloop pass. */
}
- if( mb_y & h->sh.b_mbaff )
+ if( mb_y & SLICE_MBAFF )
return;
if( min_y < h->i_threadslice_start )
return;
if( b_deblock )
- for( int y = min_y; y < max_y; y += (1 << h->sh.b_mbaff) )
+ for( int y = min_y; y < mb_y; y += (1 << SLICE_MBAFF) )
x264_frame_deblock_row( h, y );
+ /* FIXME: Prediction requires different borders for interlaced/progressive mc,
+ * but the actual image data is equivalent. For now, maintain this
+ * consistency by copying deblocked pixels between planes. */
+ if( PARAM_INTERLACED )
+ for( int p = 0; p < 2; p++ )
+ for( int i = minpix_y>>p; i < maxpix_y>>p; i++ )
+ memcpy( h->fdec->plane_fld[p] + i*h->fdec->i_stride[p],
+ h->fdec->plane[p] + i*h->fdec->i_stride[p],
+ h->mb.i_mb_width*16*sizeof(pixel) );
+
if( b_hpel )
{
int end = mb_y == h->mb.i_mb_height;
@@ -1705,25 +1788,30 @@
}
}
- if( h->i_thread_frames > 1 && h->fdec->b_kept_as_ref )
- x264_frame_cond_broadcast( h->fdec, mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << h->sh.b_mbaff)) );
+ if( SLICE_MBAFF )
+ for( int i = 0; i < 2; i++ )
+ {
+ XCHG( pixel *, h->intra_border_backup[0][i], h->intra_border_backup[3][i] );
+ XCHG( pixel *, h->intra_border_backup[1][i], h->intra_border_backup[4][i] );
+ }
- min_y = min_y*16 - 8 * !b_start;
- max_y = b_end ? X264_MIN( h->i_threadslice_end*16 , h->param.i_height ) : mb_y*16 - 8;
+ if( h->i_thread_frames > 1 && h->fdec->b_kept_as_ref )
+ x264_frame_cond_broadcast( h->fdec, mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << SLICE_MBAFF)) );
if( b_measure_quality )
{
+ maxpix_y = X264_MIN( maxpix_y, h->param.i_height );
if( h->param.analyse.b_psnr )
{
uint64_t ssd_y = x264_pixel_ssd_wxh( &h->pixf,
- h->fdec->plane[0] + min_y * h->fdec->i_stride[0], h->fdec->i_stride[0],
- h->fenc->plane[0] + min_y * h->fenc->i_stride[0], h->fenc->i_stride[0],
- h->param.i_width, max_y-min_y );
+ h->fdec->plane[0] + minpix_y * h->fdec->i_stride[0], h->fdec->i_stride[0],
+ h->fenc->plane[0] + minpix_y * h->fenc->i_stride[0], h->fenc->i_stride[0],
+ h->param.i_width, maxpix_y-minpix_y );
uint64_t ssd_u, ssd_v;
x264_pixel_ssd_nv12( &h->pixf,
- h->fdec->plane[1] + (min_y>>1) * h->fdec->i_stride[1], h->fdec->i_stride[1],
- h->fenc->plane[1] + (min_y>>1) * h->fenc->i_stride[1], h->fenc->i_stride[1],
- h->param.i_width>>1, (max_y-min_y)>>1, &ssd_u, &ssd_v );
+ h->fdec->plane[1] + (minpix_y>>1) * h->fdec->i_stride[1], h->fdec->i_stride[1],
+ h->fenc->plane[1] + (minpix_y>>1) * h->fenc->i_stride[1], h->fenc->i_stride[1],
+ h->param.i_width>>1, (maxpix_y-minpix_y)>>1, &ssd_u, &ssd_v );
h->stat.frame.i_ssd[0] += ssd_y;
h->stat.frame.i_ssd[1] += ssd_u;
h->stat.frame.i_ssd[2] += ssd_v;
@@ -1734,12 +1822,12 @@
x264_emms();
/* offset by 2 pixels to avoid alignment of ssim blocks with dct blocks,
* and overlap by 4 */
- min_y += b_start ? 2 : -6;
+ minpix_y += b_start ? 2 : -6;
h->stat.frame.f_ssim +=
x264_pixel_ssim_wxh( &h->pixf,
- h->fdec->plane[0] + 2+min_y*h->fdec->i_stride[0], h->fdec->i_stride[0],
- h->fenc->plane[0] + 2+min_y*h->fenc->i_stride[0], h->fenc->i_stride[0],
- h->param.i_width-2, max_y-min_y, h->scratch_buffer );
+ h->fdec->plane[0] + 2+minpix_y*h->fdec->i_stride[0], h->fdec->i_stride[0],
+ h->fenc->plane[0] + 2+minpix_y*h->fenc->i_stride[0], h->fenc->i_stride[0],
+ h->param.i_width-2, maxpix_y-minpix_y, h->scratch_buffer );
}
}
}
@@ -1842,12 +1930,18 @@
}
}
+ if( h->fenc->i_type == X264_TYPE_BREF && h->param.b_bluray_compat && h->sh.i_mmco_command_count )
+ {
+ h->b_sh_backup = 1;
+ h->sh_backup = h->sh;
+ }
+
h->fdec->i_frame_num = h->sh.i_frame_num;
if( h->sps->i_poc_type == 0 )
{
h->sh.i_poc = h->fdec->i_poc;
- if( h->param.b_interlaced )
+ if( PARAM_INTERLACED )
{
h->sh.i_delta_poc_bottom = h->param.b_tff ? 1 : -1;
h->sh.i_poc += h->sh.i_delta_poc_bottom == -1;
@@ -1885,6 +1979,7 @@
* other inaccuracies. */
int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 1 + h->param.b_cabac + 5;
int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : 0;
+ int back_up_bitstream = slice_max_size || (!h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH);
int starting_bits = bs_pos(&h->out.bs);
int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
int b_hpel = h->fdec->b_kept_as_ref;
@@ -1923,53 +2018,78 @@
last_emu_check = h->out.bs.p;
h->mb.i_last_qp = h->sh.i_qp;
h->mb.i_last_dqp = 0;
+ h->mb.field_decoding_flag = 0;
i_mb_y = h->sh.i_first_mb / h->mb.i_mb_width;
i_mb_x = h->sh.i_first_mb % h->mb.i_mb_width;
i_skip = 0;
- while( (mb_xy = i_mb_x + i_mb_y * h->mb.i_mb_width) <= h->sh.i_last_mb )
+ while( 1 )
{
+ mb_xy = i_mb_x + i_mb_y * h->mb.i_mb_width;
int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
- if( x264_bitstream_check_buffer( h ) )
- return -1;
-
- if( slice_max_size )
+ if( !(i_mb_y & SLICE_MBAFF) )
{
- mv_bits_bak = h->stat.frame.i_mv_bits;
- tex_bits_bak = h->stat.frame.i_tex_bits;
- /* We don't need the contexts because flushing the CABAC encoder has no context
- * dependency and macroblocks are only re-encoded in the case where a slice is
- * ended (and thus the content of all contexts are thrown away). */
- if( h->param.b_cabac )
- {
- memcpy( &cabac_bak, &h->cabac, offsetof(x264_cabac_t, f8_bits_encoded) );
- /* x264's CABAC writer modifies the previous byte during carry, so it has to be
- * backed up. */
- cabac_prevbyte_bak = h->cabac.p[-1];
- }
- else
+ if( x264_bitstream_check_buffer( h ) )
+ return -1;
+
+ if( back_up_bitstream )
{
- bs_bak = h->out.bs;
- i_skip_bak = i_skip;
+ mv_bits_bak = h->stat.frame.i_mv_bits;
+ tex_bits_bak = h->stat.frame.i_tex_bits;
+ /* We don't need the contexts because flushing the CABAC encoder has no context
+ * dependency and macroblocks are only re-encoded in the case where a slice is
+ * ended (and thus the content of all contexts are thrown away). */
+ if( h->param.b_cabac )
+ {
+ memcpy( &cabac_bak, &h->cabac, offsetof(x264_cabac_t, f8_bits_encoded) );
+ /* x264's CABAC writer modifies the previous byte during carry, so it has to be
+ * backed up. */
+ cabac_prevbyte_bak = h->cabac.p[-1];
+ }
+ else
+ {
+ bs_bak = h->out.bs;
+ i_skip_bak = i_skip;
+ }
}
}
if( i_mb_x == 0 && !h->mb.b_reencode_mb )
x264_fdec_filter_row( h, i_mb_y, 1 );
+ if( PARAM_INTERLACED )
+ {
+ if( h->mb.b_adaptive_mbaff )
+ {
+ if( !(i_mb_y&1) )
+ {
+ /* FIXME: VSAD is fast but fairly poor at choosing the best interlace type. */
+ h->mb.b_interlaced = x264_field_vsad( h, i_mb_x, i_mb_y );
+ memcpy( &h->zigzagf, MB_INTERLACED ? &h->zigzagf_interlaced : &h->zigzagf_progressive, sizeof(h->zigzagf) );
+ if( !MB_INTERLACED && (i_mb_y+2) == h->mb.i_mb_height )
+ x264_expand_border_mbpair( h, i_mb_x, i_mb_y );
+ }
+ }
+ h->mb.field[mb_xy] = MB_INTERLACED;
+ }
+
/* load cache */
- x264_macroblock_cache_load( h, i_mb_x, i_mb_y );
+ if( SLICE_MBAFF )
+ x264_macroblock_cache_load_interlaced( h, i_mb_x, i_mb_y );
+ else
+ x264_macroblock_cache_load_progressive( h, i_mb_x, i_mb_y );
x264_macroblock_analyse( h );
/* encode this macroblock -> be careful it can change the mb type to P_SKIP if needed */
+reencode:
x264_macroblock_encode( h );
if( h->param.b_cabac )
{
- if( mb_xy > h->sh.i_first_mb && !(h->sh.b_mbaff && (i_mb_y&1)) )
+ if( mb_xy > h->sh.i_first_mb && !(SLICE_MBAFF && (i_mb_y&1)) )
x264_cabac_encode_terminal( &h->cabac );
if( IS_SKIP( h->mb.i_type ) )
@@ -1993,6 +2113,19 @@
i_skip = 0;
}
x264_macroblock_write_cavlc( h );
+ /* If there was a CAVLC level code overflow, try again at a higher QP. */
+ if( h->mb.b_overflow )
+ {
+ h->mb.i_chroma_qp = h->chroma_qp_table[++h->mb.i_qp];
+ h->mb.i_skip_intra = 0;
+ h->mb.b_skip_mc = 0;
+ h->mb.b_overflow = 0;
+ h->out.bs = bs_bak;
+ i_skip = i_skip_bak;
+ h->stat.frame.i_mv_bits = mv_bits_bak;
+ h->stat.frame.i_tex_bits = tex_bits_bak;
+ goto reencode;
+ }
}
}
@@ -2030,7 +2163,16 @@
i_skip = i_skip_bak;
}
h->mb.b_reencode_mb = 1;
- h->sh.i_last_mb = mb_xy-1;
+ if( SLICE_MBAFF )
+ {
+ // set to bottom of previous mbpair
+ if( i_mb_x )
+ h->sh.i_last_mb = mb_xy-1+h->mb.i_mb_stride*(!(i_mb_y&1));
+ else
+ h->sh.i_last_mb = (i_mb_y-2+!(i_mb_y&1))*h->mb.i_mb_stride + h->mb.i_mb_width - 1;
+ }
+ else
+ h->sh.i_last_mb = mb_xy-1;
break;
}
else
@@ -2055,9 +2197,10 @@
h->stat.frame.i_mb_count[h->mb.i_type]++;
int b_intra = IS_INTRA( h->mb.i_type );
+ int b_skip = IS_SKIP( h->mb.i_type );
if( h->param.i_log_level >= X264_LOG_INFO || h->param.rc.b_stat_write )
{
- if( !b_intra && !IS_SKIP( h->mb.i_type ) && !IS_DIRECT( h->mb.i_type ) )
+ if( !b_intra && !b_skip && !IS_DIRECT( h->mb.i_type ) )
{
if( h->mb.i_partition != D_8x8 )
h->stat.frame.i_mb_partition[h->mb.i_partition] += 4;
@@ -2102,24 +2245,19 @@
h->stat.frame.i_mb_pred_mode[2][h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]]++;
h->stat.frame.i_mb_pred_mode[3][x264_mb_pred_mode8x8c_fix[h->mb.i_chroma_pred_mode]]++;
}
+ h->stat.frame.i_mb_field[b_intra?0:b_skip?2:1] += MB_INTERLACED;
}
/* calculate deblock strength values (actual deblocking is done per-row along with hpel) */
if( b_deblock )
- {
- int mvy_limit = 4 >> h->sh.b_mbaff;
- uint8_t (*bs)[4][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
- x264_macroblock_cache_load_deblock( h );
- if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) )
- memset( bs, 3, 2*4*4*sizeof(uint8_t) );
- else
- h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
- bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B );
- }
+ x264_macroblock_deblock_strength( h );
x264_ratecontrol_mb( h, mb_size );
- if( h->sh.b_mbaff )
+ if( mb_xy == h->sh.i_last_mb )
+ break;
+
+ if( SLICE_MBAFF )
{
i_mb_x += i_mb_y & 1;
i_mb_y ^= i_mb_x < h->mb.i_mb_width;
@@ -2179,6 +2317,7 @@
memcpy( &dst->i_frame, &src->i_frame, offsetof(x264_t, mb.type) - offsetof(x264_t, i_frame) );
dst->param = src->param;
dst->stat = src->stat;
+ dst->pixf = src->pixf;
}
static void x264_thread_sync_stat( x264_t *dst, x264_t *src )
@@ -2202,15 +2341,28 @@
/* init stats */
memset( &h->stat.frame, 0, sizeof(h->stat.frame) );
h->mb.b_reencode_mb = 0;
- while( h->sh.i_first_mb <= last_thread_mb )
+ while( h->sh.i_first_mb + SLICE_MBAFF*h->mb.i_mb_stride <= last_thread_mb )
{
h->sh.i_last_mb = last_thread_mb;
if( h->param.i_slice_max_mbs )
- h->sh.i_last_mb = h->sh.i_first_mb + h->param.i_slice_max_mbs - 1;
+ {
+ if( SLICE_MBAFF )
+ {
+ // convert first to mbaff form, add slice-max-mbs, then convert back to normal form
+ int last_mbaff = 2*(h->sh.i_first_mb % h->mb.i_mb_width)
+ + h->mb.i_mb_width*(h->sh.i_first_mb / h->mb.i_mb_width)
+ + h->param.i_slice_max_mbs - 1;
+ int last_x = (last_mbaff % (2*h->mb.i_mb_width))/2;
+ int last_y = (last_mbaff / (2*h->mb.i_mb_width))*2 + 1;
+ h->sh.i_last_mb = last_x + h->mb.i_mb_stride*last_y;
+ }
+ else
+ h->sh.i_last_mb = h->sh.i_first_mb + h->param.i_slice_max_mbs - 1;
+ }
else if( h->param.i_slice_count && !h->param.b_sliced_threads )
{
- int height = h->mb.i_mb_height >> h->param.b_interlaced;
- int width = h->mb.i_mb_width << h->param.b_interlaced;
+ int height = h->mb.i_mb_height >> PARAM_INTERLACED;
+ int width = h->mb.i_mb_width << PARAM_INTERLACED;
i_slice_num++;
h->sh.i_last_mb = (height * i_slice_num + h->param.i_slice_count/2) / h->param.i_slice_count * width - 1;
}
@@ -2218,6 +2370,9 @@
if( x264_stack_align( x264_slice_write, h ) )
return (void *)-1;
h->sh.i_first_mb = h->sh.i_last_mb + 1;
+ // if i_first_mb is not the last mb in a row then go to the next mb in MBAFF order
+ if( SLICE_MBAFF && h->sh.i_first_mb % h->mb.i_mb_width )
+ h->sh.i_first_mb -= h->mb.i_mb_stride;
}
#if HAVE_VISUALIZE
@@ -2242,9 +2397,9 @@
t->param = h->param;
memcpy( &t->i_frame, &h->i_frame, offsetof(x264_t, rc) - offsetof(x264_t, i_frame) );
}
- int height = h->mb.i_mb_height >> h->param.b_interlaced;
- t->i_threadslice_start = ((height * i + h->param.i_slice_count/2) / h->param.i_threads) << h->param.b_interlaced;
- t->i_threadslice_end = ((height * (i+1) + h->param.i_slice_count/2) / h->param.i_threads) << h->param.b_interlaced;
+ int height = h->mb.i_mb_height >> PARAM_INTERLACED;
+ t->i_threadslice_start = ((height * i + h->param.i_slice_count/2) / h->param.i_threads) << PARAM_INTERLACED;
+ t->i_threadslice_end = ((height * (i+1) + h->param.i_slice_count/2) / h->param.i_threads) << PARAM_INTERLACED;
t->sh.i_first_mb = t->i_threadslice_start * h->mb.i_mb_width;
t->sh.i_last_mb = t->i_threadslice_end * h->mb.i_mb_width - 1;
}
@@ -2270,7 +2425,7 @@
for( int i = 1; i < h->param.i_threads; i++ )
{
x264_fdec_filter_row( h->thread[i], h->thread[i]->i_threadslice_start + 1, 0 );
- if( h->sh.b_mbaff )
+ if( SLICE_MBAFF )
x264_fdec_filter_row( h->thread[i], h->thread[i]->i_threadslice_start + 2, 0 );
}
@@ -2410,7 +2565,11 @@
if( fenc->i_pic_struct == PIC_STRUCT_AUTO )
{
+#if HAVE_INTERLACED
int b_interlaced = fenc->param ? fenc->param->b_interlaced : h->param.b_interlaced;
+#else
+ int b_interlaced = 0;
+#endif
if( b_interlaced )
{
int b_tff = fenc->param ? fenc->param->b_tff : h->param.b_tff;
@@ -2520,7 +2679,7 @@
i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/
h->sh.i_type = SLICE_TYPE_I;
x264_reference_hierarchy_reset( h );
- if( h->param.i_open_gop )
+ if( h->param.b_open_gop )
h->frames.i_poc_last_open_gop = h->fenc->b_keyframe ? h->fenc->i_poc : -1;
}
else if( h->fenc->i_type == X264_TYPE_P )
@@ -2695,7 +2854,7 @@
if( h->fenc->i_type != X264_TYPE_IDR )
{
- int time_to_recovery = h->param.i_open_gop ? 0 : X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe - 1;
+ int time_to_recovery = h->param.b_open_gop ? 0 : X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe - 1;
x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
if( x264_nal_end( h ) )
@@ -2723,6 +2882,17 @@
overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
}
+ /* As required by Blu-ray. */
+ if( !IS_X264_TYPE_B( h->fenc->i_type ) && h->b_sh_backup )
+ {
+ h->b_sh_backup = 0;
+ x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+ x264_sei_dec_ref_pic_marking_write( h, &h->out.bs );
+ if( x264_nal_end( h ) )
+ return -1;
+ overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
+ }
+
if( h->fenc->b_keyframe && h->param.b_intra_refresh )
h->i_cpb_delay_pir_offset = h->fenc->i_cpb_delay;
@@ -2814,6 +2984,8 @@
}
int frame_size = x264_encoder_encapsulate_nals( h, 0 );
+ if( frame_size < 0 )
+ return -1;
/* Set output picture properties */
pic_out->i_type = h->fenc->i_type;
@@ -2867,6 +3039,8 @@
if( x264_nal_end( h ) )
return -1;
int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 );
+ if( total_size < 0 )
+ return -1;
frame_size += total_size;
filler -= total_size;
}
@@ -2902,6 +3076,8 @@
for( int i_list = 0; i_list < 2; i_list++ )
for( int i = 0; i < X264_REF_MAX*2; i++ )
h->stat.i_mb_count_ref[h->sh.i_type][i_list][i] += h->stat.frame.i_mb_count_ref[i_list][i];
+ for( int i = 0; i < 3; i++ )
+ h->stat.i_mb_field[i] += h->stat.frame.i_mb_field[i];
if( h->sh.i_type == SLICE_TYPE_P && h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE )
{
h->stat.i_wpred[0] += !!h->sh.weight[0][0].weightfn;
@@ -3171,15 +3347,30 @@
int64_t i_intra = i_i8x8 + SUM3b( h->stat.i_mb_count, I_4x4 )
+ SUM3b( h->stat.i_mb_count, I_16x16 );
int64_t i_all_intra = i_intra + SUM3b( h->stat.i_mb_count, I_PCM);
+ int64_t i_skip = SUM3b( h->stat.i_mb_count, P_SKIP )
+ + SUM3b( h->stat.i_mb_count, B_SKIP );
const int i_count = h->stat.i_frame_count[SLICE_TYPE_I] +
h->stat.i_frame_count[SLICE_TYPE_P] +
h->stat.i_frame_count[SLICE_TYPE_B];
+ int64_t i_mb_count = (int64_t)i_count * h->mb.i_mb_count;
+ int64_t i_inter = i_mb_count - i_skip - i_intra;
const double duration = h->stat.f_frame_duration[SLICE_TYPE_I] +
h->stat.f_frame_duration[SLICE_TYPE_P] +
h->stat.f_frame_duration[SLICE_TYPE_B];
- int64_t i_mb_count = (int64_t)i_count * h->mb.i_mb_count;
float f_bitrate = SUM3(h->stat.i_frame_size) / duration / 125;
+ if( PARAM_INTERLACED )
+ {
+ char *fieldstats = buf;
+ fieldstats[0] = 0;
+ if( i_inter )
+ fieldstats += sprintf( fieldstats, " inter:%.1f%%", h->stat.i_mb_field[1] * 100.0 / i_inter );
+ if( i_skip )
+ fieldstats += sprintf( fieldstats, " skip:%.1f%%", h->stat.i_mb_field[2] * 100.0 / i_skip );
+ x264_log( h, X264_LOG_INFO, "field mbs: intra: %.1f%%%s\n",
+ h->stat.i_mb_field[0] * 100.0 / i_intra, buf );
+ }
+
if( h->pps->b_transform_8x8_mode )
{
buf[0] = 0;
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/encoder/macroblock.c
^
|
@@ -273,59 +273,19 @@
h->dctf.add16x16_idct_dc( p_dst, dct_dc4x4 );
}
-static inline int idct_dequant_round_2x2_dc( dctcoef ref[4], dctcoef dct[4], int dequant_mf[6][16], int i_qp )
-{
- dctcoef out[4];
- idct_dequant_2x2_dconly( out, dct, dequant_mf, i_qp );
- return ((ref[0] ^ (out[0]+32))
- | (ref[1] ^ (out[1]+32))
- | (ref[2] ^ (out[2]+32))
- | (ref[3] ^ (out[3]+32))) >> 6;
-}
-
/* Round down coefficients losslessly in DC-only chroma blocks.
* Unlike luma blocks, this can't be done with a lookup table or
* other shortcut technique because of the interdependencies
* between the coefficients due to the chroma DC transform. */
-static inline int x264_mb_optimize_chroma_dc( x264_t *h, int b_inter, int i_qp, dctcoef dct2x2[4] )
+static ALWAYS_INLINE int x264_mb_optimize_chroma_dc( x264_t *h, dctcoef dct2x2[4], int dequant_mf[6][16], int i_qp )
{
- dctcoef dct2x2_orig[4];
- int coeff, nz;
+ int dmf = dequant_mf[i_qp%6][0] << i_qp/6;
/* If the QP is too high, there's no benefit to rounding optimization. */
- if( h->dequant4_mf[CQM_4IC + b_inter][i_qp%6][0] << (i_qp/6) > 32*64 )
+ if( dmf > 32*64 )
return 1;
- idct_dequant_2x2_dconly( dct2x2_orig, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp );
- dct2x2_orig[0] += 32;
- dct2x2_orig[1] += 32;
- dct2x2_orig[2] += 32;
- dct2x2_orig[3] += 32;
-
- /* If the DC coefficients already round to zero, terminate early. */
- if( !((dct2x2_orig[0]|dct2x2_orig[1]|dct2x2_orig[2]|dct2x2_orig[3])>>6) )
- return 0;
-
- /* Start with the highest frequency coefficient... is this the best option? */
- for( nz = 0, coeff = h->quantf.coeff_last[DCT_CHROMA_DC]( dct2x2 ); coeff >= 0; coeff-- )
- {
- int level = dct2x2[coeff];
- int sign = level>>31 | 1; /* dct2x2[coeff] < 0 ? -1 : 1 */
-
- while( level )
- {
- dct2x2[coeff] = level - sign;
- if( idct_dequant_round_2x2_dc( dct2x2_orig, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp ) )
- {
- nz = 1;
- dct2x2[coeff] = level;
- break;
- }
- level -= sign;
- }
- }
-
- return nz;
+ return h->quantf.optimize_chroma_dc( dct2x2, dmf );
}
void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
@@ -370,7 +330,7 @@
if( nz_dc )
{
- if( !x264_mb_optimize_chroma_dc( h, b_inter, i_qp, dct2x2 ) )
+ if( !x264_mb_optimize_chroma_dc( h, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp ) )
continue;
h->mb.cache.non_zero_count[x264_scan8[25]+ch] = 1;
zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
@@ -446,7 +406,7 @@
h->mb.cache.non_zero_count[x264_scan8[16+3]+24*ch] = 0;
if( !nz_dc ) /* Whole block is empty */
continue;
- if( !x264_mb_optimize_chroma_dc( h, b_inter, i_qp, dct2x2 ) )
+ if( !x264_mb_optimize_chroma_dc( h, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp ) )
{
h->mb.cache.non_zero_count[x264_scan8[25]+ch] = 0;
continue;
@@ -553,7 +513,7 @@
void x264_predict_lossless_4x4( x264_t *h, pixel *p_dst, int idx, int i_mode )
{
- int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
+ int stride = h->fenc->i_stride[0] << MB_INTERLACED;
pixel *p_src = h->mb.pic.p_fenc_plane[0] + block_idx_x[idx]*4 + block_idx_y[idx]*4 * stride;
if( i_mode == I_PRED_4x4_V )
@@ -566,7 +526,7 @@
void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int idx, int i_mode, pixel edge[33] )
{
- int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
+ int stride = h->fenc->i_stride[0] << MB_INTERLACED;
pixel *p_src = h->mb.pic.p_fenc_plane[0] + (idx&1)*8 + (idx>>1)*8*stride;
if( i_mode == I_PRED_8x8_V )
@@ -579,7 +539,7 @@
void x264_predict_lossless_16x16( x264_t *h, int i_mode )
{
- int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
+ int stride = h->fenc->i_stride[0] << MB_INTERLACED;
if( i_mode == I_PRED_16x16_V )
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-stride, stride, 16 );
else if( i_mode == I_PRED_16x16_H )
@@ -609,13 +569,8 @@
return;
}
- if( h->sh.b_mbaff
- && h->mb.i_mb_xy == h->sh.i_first_mb + h->mb.i_mb_stride
- && IS_SKIP(h->mb.type[h->sh.i_first_mb]) )
+ if( !h->mb.b_allow_skip )
{
- /* The first skip is predicted to be a frame mb pair.
- * We don't yet support the aff part of mbaff, so force it to non-skip
- * so that we can pick the aff flag. */
b_force_no_skip = 1;
if( IS_SKIP(h->mb.i_type) )
{
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/encoder/me.c
^
|
@@ -814,7 +814,7 @@
const uint16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];
const int i_pixel = m->i_pixel;
const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
- const int mvy_offset = h->mb.b_interlaced & m->i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
+ const int mvy_offset = MB_INTERLACED & m->i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
ALIGNED_ARRAY_16( pixel, pix,[64*18] ); // really 17x17x2, but round up for alignment
@@ -951,8 +951,8 @@
pixel *pixv = &h->mb.pic.p_fdec[2][4*x + 4*y*FDEC_STRIDE];
int ref0 = h->mb.cache.ref[0][s8];
int ref1 = h->mb.cache.ref[1][s8];
- const int mv0y_offset = h->mb.b_interlaced & ref0 ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
- const int mv1y_offset = h->mb.b_interlaced & ref1 ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
+ const int mv0y_offset = MB_INTERLACED & ref0 ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
+ const int mv1y_offset = MB_INTERLACED & ref1 ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
int stride[2][9];
int bm0x = m0->mv[0];
int bm0y = m0->mv[1];
@@ -965,7 +965,7 @@
/* each byte of visited represents 8 possible m1y positions, so a 4D array isn't needed */
ALIGNED_ARRAY_16( uint8_t, visited,[8],[8][8] );
/* all permutations of an offset in up to 2 of the dimensions */
- static const int8_t dia4d[33][4] =
+ ALIGNED_4( static const int8_t dia4d[33][4] ) =
{
{0,0,0,0},
{0,0,0,1}, {0,0,0,-1}, {0,0,1,0}, {0,0,-1,0},
@@ -1129,14 +1129,13 @@
const int bw = x264_pixel_size[m->i_pixel].w;
const int bh = x264_pixel_size[m->i_pixel].h;
const int i_pixel = m->i_pixel;
- const int mvy_offset = h->mb.b_interlaced & m->i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
+ const int mvy_offset = MB_INTERLACED & m->i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
uint64_t bcost = COST_MAX64;
int bmx = m->mv[0];
int bmy = m->mv[1];
int omx, omy, pmx, pmy;
- unsigned bsatd;
- int satd;
+ int satd, bsatd;
int dir = -2;
int i8 = i4>>2;
uint16_t amvd;
@@ -1227,7 +1226,7 @@
m->mv[0] = bmx;
m->mv[1] = bmy;
x264_macroblock_cache_mv ( h, block_idx_x[i4], block_idx_y[i4], bw>>2, bh>>2, i_list, pack16to32_mask(bmx, bmy) );
- amvd = pack8to16( X264_MIN(abs(bmx - m->mvp[0]),33), X264_MIN(abs(bmy - m->mvp[1]),33) );
+ amvd = pack8to16( X264_MIN(abs(bmx - m->mvp[0]),66), X264_MIN(abs(bmy - m->mvp[1]),66) );
x264_macroblock_cache_mvd( h, block_idx_x[i4], block_idx_y[i4], bw>>2, bh>>2, i_list, amvd );
h->mb.b_skip_mc = 0;
}
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/encoder/me.h
^
|
@@ -55,7 +55,8 @@
ALIGNED_4( int16_t mv[2] );
} ALIGNED_16( x264_me_t );
-typedef struct {
+typedef struct
+{
int sad;
int16_t mv[2];
} mvsad_t;
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/encoder/ratecontrol.c
^
|
@@ -29,7 +29,6 @@
#define _ISOC99_SOURCE
#undef NDEBUG // always check asserts, the speed effect is far too small to disable them
-#include <math.h>
#include "common/common.h"
#include "ratecontrol.h"
@@ -63,10 +62,10 @@
typedef struct
{
- double coeff;
- double count;
- double decay;
- double offset;
+ float coeff;
+ float count;
+ float decay;
+ float offset;
} predictor_t;
struct x264_ratecontrol_t
@@ -88,7 +87,7 @@
int qp; /* qp for current frame */
float qpm; /* qp for current macroblock: precise float for AQ */
float qpa_rc; /* average of macroblocks' qp before aq */
- float qpa_aq; /* average of macroblocks' qp after aq */
+ int qpa_aq; /* average of macroblocks' qp after aq */
float qp_novbv; /* QP for the current frame if 1-pass VBV was disabled. */
/* VBV stuff */
@@ -168,8 +167,8 @@
static float rate_estimate_qscale( x264_t *h );
static int update_vbv( x264_t *h, int bits );
static void update_vbv_plan( x264_t *h, int overhead );
-static double predict_size( predictor_t *p, double q, double var );
-static void update_predictor( predictor_t *p, double q, double var, double bits );
+static float predict_size( predictor_t *p, float q, float var );
+static void update_predictor( predictor_t *p, float q, float var, float bits );
#define CMP_OPT_FIRST_PASS( opt, param_val )\
{\
@@ -184,13 +183,13 @@
* qp = h.264's quantizer
* qscale = linearized quantizer = Lagrange multiplier
*/
-static inline double qp2qscale( double qp )
+static inline float qp2qscale( float qp )
{
- return 0.85 * pow( 2.0, ( qp - 12.0 ) / 6.0 );
+ return 0.85f * powf( 2.0f, ( qp - 12.0f ) / 6.0f );
}
-static inline double qscale2qp( double qscale )
+static inline float qscale2qp( float qscale )
{
- return 12.0 + 6.0 * log2( qscale/0.85 );
+ return 12.0f + 6.0f * log2f( qscale/0.85f );
}
/* Texture bitrate is not quite inversely proportional to qscale,
@@ -206,32 +205,35 @@
+ rce->misc_bits;
}
-static ALWAYS_INLINE uint32_t ac_energy_var( uint64_t sum_ssd, int shift, x264_frame_t *frame, int i )
+static ALWAYS_INLINE uint32_t ac_energy_var( uint64_t sum_ssd, int shift, x264_frame_t *frame, int i, int b_store )
{
uint32_t sum = sum_ssd;
uint32_t ssd = sum_ssd >> 32;
- frame->i_pixel_sum[i] += sum;
- frame->i_pixel_ssd[i] += ssd;
+ if( b_store )
+ {
+ frame->i_pixel_sum[i] += sum;
+ frame->i_pixel_ssd[i] += ssd;
+ }
return ssd - ((uint64_t)sum * sum >> shift);
}
-static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, int i )
+static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, int i, int field, int b_store )
{
int w = i ? 8 : 16;
int stride = frame->i_stride[i];
- int offset = h->mb.b_interlaced
+ int offset = field
? 16 * mb_x + w * (mb_y&~1) * stride + (mb_y&1) * stride
: 16 * mb_x + w * mb_y * stride;
- stride <<= h->mb.b_interlaced;
+ stride <<= field;
if( i )
{
ALIGNED_ARRAY_16( pixel, pix,[FENC_STRIDE*8] );
h->mc.load_deinterleave_8x8x2_fenc( pix, frame->plane[1] + offset, stride );
- return ac_energy_var( h->pixf.var[PIXEL_8x8]( pix, FENC_STRIDE ), 6, frame, 1 )
- + ac_energy_var( h->pixf.var[PIXEL_8x8]( pix+FENC_STRIDE/2, FENC_STRIDE ), 6, frame, 2 );
+ return ac_energy_var( h->pixf.var[PIXEL_8x8]( pix, FENC_STRIDE ), 6, frame, 1, b_store )
+ + ac_energy_var( h->pixf.var[PIXEL_8x8]( pix+FENC_STRIDE/2, FENC_STRIDE ), 6, frame, 2, b_store );
}
else
- return ac_energy_var( h->pixf.var[PIXEL_16x16]( frame->plane[0] + offset, stride ), 8, frame, 0 );
+ return ac_energy_var( h->pixf.var[PIXEL_16x16]( frame->plane[0] + offset, stride ), 8, frame, 0, b_store );
}
// Find the total AC energy of the block in all planes.
@@ -241,8 +243,23 @@
* and putting it after floating point ops. As a result, we put the emms at the end of the
* function and make sure that its always called before the float math. Noinline makes
* sure no reordering goes on. */
- uint32_t var = ac_energy_plane( h, mb_x, mb_y, frame, 0 );
- var += ac_energy_plane( h, mb_x, mb_y, frame, 1 );
+ uint32_t var;
+ if( h->mb.b_adaptive_mbaff )
+ {
+ /* We don't know the super-MB mode we're going to pick yet, so
+ * simply try both and pick the lower of the two. */
+ uint32_t var_interlaced, var_progressive;
+ var_interlaced = ac_energy_plane( h, mb_x, mb_y, frame, 0, 1, 1 );
+ var_interlaced += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 1 );
+ var_progressive = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, 0 );
+ var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, 0 );
+ var = X264_MIN( var_interlaced, var_progressive );
+ }
+ else
+ {
+ var = ac_energy_plane( h, mb_x, mb_y, frame, 0, PARAM_INTERLACED, 1 );
+ var += ac_energy_plane( h, mb_x, mb_y, frame, 1, PARAM_INTERLACED, 1 );
+ }
x264_emms();
return var;
}
@@ -460,6 +477,11 @@
if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 )
{
+ /* We don't support changing the ABR bitrate right now,
+ so if the stream starts as CBR, keep it CBR. */
+ if( rc->b_vbv_min_rate )
+ h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
+
if( h->param.rc.i_vbv_buffer_size < (int)(h->param.rc.i_vbv_max_bitrate / rc->fps) )
{
h->param.rc.i_vbv_buffer_size = h->param.rc.i_vbv_max_bitrate / rc->fps;
@@ -467,17 +489,10 @@
h->param.rc.i_vbv_buffer_size );
}
- /* We don't support changing the ABR bitrate right now,
- so if the stream starts as CBR, keep it CBR. */
- if( rc->b_vbv_min_rate )
- h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
-
int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * 1000;
int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000;
/* Init HRD */
- h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate;
- h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
if( h->param.i_nal_hrd && b_init )
{
h->sps->vui.hrd.i_cpb_cnt = 1;
@@ -522,7 +537,11 @@
x264_log( h, X264_LOG_WARNING, "VBV parameters cannot be changed when NAL HRD is in use\n" );
return;
}
+ h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate;
+ h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
+ if( rc->b_vbv_min_rate )
+ rc->bitrate = h->param.rc.i_bitrate * 1000.;
rc->buffer_rate = vbv_max_bitrate / rc->fps;
rc->vbv_max_rate = vbv_max_bitrate;
rc->buffer_size = vbv_buffer_size;
@@ -736,7 +755,8 @@
CMP_OPT_FIRST_PASS( "bframes", h->param.i_bframe );
CMP_OPT_FIRST_PASS( "b_pyramid", h->param.i_bframe_pyramid );
CMP_OPT_FIRST_PASS( "intra_refresh", h->param.b_intra_refresh );
- CMP_OPT_FIRST_PASS( "open_gop", h->param.i_open_gop );
+ CMP_OPT_FIRST_PASS( "open_gop", h->param.b_open_gop );
+ CMP_OPT_FIRST_PASS( "bluray_compat", h->param.b_bluray_compat );
if( (p = strstr( opts, "keyint=" )) )
{
@@ -1199,6 +1219,8 @@
if( rc->b_vbv )
{
memset( h->fdec->i_row_bits, 0, h->mb.i_mb_height * sizeof(int) );
+ memset( h->fdec->f_row_qp, 0, h->mb.i_mb_height * sizeof(float) );
+ memset( h->fdec->f_row_qscale, 0, h->mb.i_mb_height * sizeof(float) );
rc->row_pred = &rc->row_preds[h->sh.i_type];
rc->buffer_rate = h->fenc->i_cpb_duration * rc->vbv_max_rate * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
update_vbv_plan( h, overhead );
@@ -1209,8 +1231,7 @@
int mincr = l->mincr;
- /* Blu-ray requires this */
- if( l->level_idc == 41 && h->param.i_nal_hrd )
+ if( h->param.b_bluray_compat )
mincr = 4;
/* High 10 doesn't require minCR, so just set the maximum to a large value. */
@@ -1237,11 +1258,7 @@
if( h->sh.i_type != SLICE_TYPE_B )
rc->bframes = h->fenc->i_bframes;
- if( i_force_qp != X264_QP_AUTO )
- {
- q = i_force_qp - 1;
- }
- else if( rc->b_abr )
+ if( rc->b_abr )
{
q = qscale2qp( rate_estimate_qscale( h ) );
}
@@ -1265,12 +1282,14 @@
q -= 6*log2f( zone->f_bitrate_factor );
}
}
+ if( i_force_qp != X264_QP_AUTO )
+ q = i_force_qp - 1;
q = x264_clip3f( q, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
rc->qpa_rc =
rc->qpa_aq = 0;
- rc->qp = x264_clip3( (int)(q + 0.5), 0, QP_MAX );
+ rc->qp = x264_clip3( q + 0.5f, 0, QP_MAX );
h->fdec->f_qp_avg_rc =
h->fdec->f_qp_avg_aq =
rc->qpm = q;
@@ -1283,72 +1302,84 @@
rc->last_non_b_pict_type = h->sh.i_type;
}
-static double predict_row_size( x264_t *h, int y, double qp )
+static float predict_row_size( x264_t *h, int y, float qscale )
{
/* average between two predictors:
* absolute SATD, and scaled bit cost of the colocated row in the previous frame */
x264_ratecontrol_t *rc = h->rc;
- double pred_s = predict_size( rc->row_pred[0], qp2qscale( qp ), h->fdec->i_row_satd[y] );
- double pred_t = 0;
- if( h->sh.i_type == SLICE_TYPE_I || qp >= h->fref[0][0]->f_row_qp[y] )
+ float pred_s = predict_size( rc->row_pred[0], qscale, h->fdec->i_row_satd[y] );
+ if( h->sh.i_type == SLICE_TYPE_I || qscale >= h->fref[0][0]->f_row_qscale[y] )
{
if( h->sh.i_type == SLICE_TYPE_P
&& h->fref[0][0]->i_type == h->fdec->i_type
+ && h->fref[0][0]->f_row_qscale[y] > 0
&& h->fref[0][0]->i_row_satd[y] > 0
&& (abs(h->fref[0][0]->i_row_satd[y] - h->fdec->i_row_satd[y]) < h->fdec->i_row_satd[y]/2))
{
- pred_t = h->fref[0][0]->i_row_bits[y] * h->fdec->i_row_satd[y] / h->fref[0][0]->i_row_satd[y]
- * qp2qscale( h->fref[0][0]->f_row_qp[y] ) / qp2qscale( qp );
+ float pred_t = h->fref[0][0]->i_row_bits[y] * h->fdec->i_row_satd[y] / h->fref[0][0]->i_row_satd[y]
+ * h->fref[0][0]->f_row_qscale[y] / qscale;
+ return (pred_s + pred_t) * 0.5f;
}
- if( pred_t == 0 )
- pred_t = pred_s;
- return (pred_s + pred_t) / 2;
+ return pred_s;
}
/* Our QP is lower than the reference! */
else
{
- double pred_intra = predict_size( rc->row_pred[1], qp2qscale( qp ), h->fdec->i_row_satds[0][0][y] );
+ float pred_intra = predict_size( rc->row_pred[1], qscale, h->fdec->i_row_satds[0][0][y] );
/* Sum: better to overestimate than underestimate by using only one of the two predictors. */
return pred_intra + pred_s;
}
}
-static double row_bits_so_far( x264_t *h, int y )
+static int row_bits_so_far( x264_t *h, int y )
{
- double bits = 0;
+ int bits = 0;
for( int i = h->i_threadslice_start; i <= y; i++ )
bits += h->fdec->i_row_bits[i];
return bits;
}
-static double predict_row_size_sum( x264_t *h, int y, double qp )
+static float predict_row_size_sum( x264_t *h, int y, float qp )
{
- double bits = row_bits_so_far(h, y);
+ float qscale = qp2qscale( qp );
+ float bits = row_bits_so_far( h, y );
for( int i = y+1; i < h->i_threadslice_end; i++ )
- bits += predict_row_size( h, i, qp );
+ bits += predict_row_size( h, i, qscale );
return bits;
}
-
+/* TODO:
+ * eliminate all use of qp in row ratecontrol: make it entirely qscale-based.
+ * make this function stop being needlessly O(N^2)
+ * update more often than once per row? */
void x264_ratecontrol_mb( x264_t *h, int bits )
{
x264_ratecontrol_t *rc = h->rc;
const int y = h->mb.i_mb_y;
- x264_emms();
-
h->fdec->i_row_bits[y] += bits;
- rc->qpa_rc += rc->qpm;
rc->qpa_aq += h->mb.i_qp;
- if( h->mb.i_mb_x != h->mb.i_mb_width - 1 || !rc->b_vbv )
+ if( h->mb.i_mb_x != h->mb.i_mb_width - 1 )
+ return;
+
+ x264_emms();
+ rc->qpa_rc += rc->qpm * h->mb.i_mb_width;
+
+ if( !rc->b_vbv )
return;
+ float qscale = qp2qscale( rc->qpm );
h->fdec->f_row_qp[y] = rc->qpm;
+ h->fdec->f_row_qscale[y] = qscale;
- update_predictor( rc->row_pred[0], qp2qscale( rc->qpm ), h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] );
+ update_predictor( rc->row_pred[0], qscale, h->fdec->i_row_satd[y], h->fdec->i_row_bits[y] );
if( h->sh.i_type == SLICE_TYPE_P && rc->qpm < h->fref[0][0]->f_row_qp[y] )
- update_predictor( rc->row_pred[1], qp2qscale( rc->qpm ), h->fdec->i_row_satds[0][0][y], h->fdec->i_row_bits[y] );
+ update_predictor( rc->row_pred[1], qscale, h->fdec->i_row_satds[0][0][y], h->fdec->i_row_bits[y] );
+
+ /* update ratecontrol per-mbpair in MBAFF */
+ if( SLICE_MBAFF && !(y&1) )
+ return;
/* tweak quality based on difference from predicted size */
if( y < h->i_threadslice_end-1 )
@@ -1359,7 +1390,7 @@
if( rc->rate_factor_max_increment )
qp_absolute_max = X264_MIN( qp_absolute_max, rc->qp_novbv + rc->rate_factor_max_increment );
float qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, qp_absolute_max );
- float step_size = 0.5;
+ float step_size = 0.5f;
/* B-frames shouldn't use lower QP than their reference frames. */
if( h->sh.i_type == SLICE_TYPE_B )
@@ -1370,7 +1401,7 @@
float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned;
- float max_frame_error = X264_MAX( 0.05, 1.0 / (h->mb.i_mb_height) );
+ float max_frame_error = X264_MAX( 0.05f, 1.0f / h->mb.i_mb_height );
float size_of_other_slices = 0;
if( h->param.b_sliced_threads )
{
@@ -1387,22 +1418,22 @@
/* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */
float rc_tol = buffer_left_planned / h->param.i_threads * rc->rate_tolerance;
- int b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
+ float b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
/* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
/* area at the top of the frame was measured inaccurately. */
- if( row_bits_so_far( h, y ) < 0.05 * slice_size_planned )
+ if( row_bits_so_far( h, y ) < 0.05f * slice_size_planned )
return;
if( h->sh.i_type != SLICE_TYPE_I )
- rc_tol /= 2;
+ rc_tol *= 0.5f;
if( !rc->b_vbv_min_rate )
qp_min = X264_MAX( qp_min, rc->qp_novbv );
while( rc->qpm < qp_max
&& ((b1 > rc->frame_size_planned + rc_tol) ||
- (rc->buffer_fill - b1 < buffer_left_planned * 0.5) ||
+ (rc->buffer_fill - b1 < buffer_left_planned * 0.5f) ||
(b1 > rc->frame_size_planned && rc->qpm < rc->qp_novbv)) )
{
rc->qpm += step_size;
@@ -1411,8 +1442,8 @@
while( rc->qpm > qp_min
&& (rc->qpm > h->fdec->f_row_qp[0] || rc->single_frame_vbv)
- && ((b1 < rc->frame_size_planned * 0.8 && rc->qpm <= prev_row_qp)
- || b1 < (rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 1.1) )
+ && ((b1 < rc->frame_size_planned * 0.8f && rc->qpm <= prev_row_qp)
+ || b1 < (rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 1.1f) )
{
rc->qpm -= step_size;
b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
@@ -1427,14 +1458,16 @@
b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
}
- h->rc->frame_size_estimated = predict_row_size_sum( h, y, rc->qpm );
+ h->rc->frame_size_estimated = b1 - size_of_other_slices;
}
+ else
+ h->rc->frame_size_estimated = predict_row_size_sum( h, y, rc->qpm );
}
int x264_ratecontrol_qp( x264_t *h )
{
x264_emms();
- return x264_clip3( h->rc->qpm + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
+ return x264_clip3( h->rc->qpm + 0.5f, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
}
int x264_ratecontrol_mb_qp( x264_t *h )
@@ -1450,7 +1483,7 @@
qp_offset *= (QP_MAX - qp) / (QP_MAX - QP_MAX_SPEC);
qp += qp_offset;
}
- return x264_clip3( qp + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
+ return x264_clip3( qp + 0.5f, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
}
/* In 2pass, force the same frame types as in the 1st pass */
@@ -1526,7 +1559,7 @@
h->stat.frame.i_mb_count_p += mbs[i];
h->fdec->f_qp_avg_rc = rc->qpa_rc /= h->mb.i_mb_count;
- h->fdec->f_qp_avg_aq = rc->qpa_aq /= h->mb.i_mb_count;
+ h->fdec->f_qp_avg_aq = (float)rc->qpa_aq / h->mb.i_mb_count;
if( h->param.rc.b_stat_write )
{
@@ -1558,7 +1591,7 @@
for( int i = 0; i < (use_old_stats ? rc->rce->refs : h->i_ref[0]); i++ )
{
int refcount = use_old_stats ? rc->rce->refcount[i]
- : h->param.b_interlaced ? h->stat.frame.i_mb_count_ref[0][i*2]
+ : PARAM_INTERLACED ? h->stat.frame.i_mb_count_ref[0][i*2]
+ h->stat.frame.i_mb_count_ref[0][i*2+1]
: h->stat.frame.i_mb_count_ref[0][i];
if( fprintf( rc->p_stat_file_out, "%d ", refcount ) < 0 )
@@ -1689,7 +1722,14 @@
{
x264_ratecontrol_t *rcc= h->rc;
x264_zone_t *zone = get_zone( h, frame_num );
- double q = pow( rce->blurred_complexity, 1 - rcc->qcompress );
+ double q;
+ if( h->param.rc.b_mb_tree )
+ {
+ double timescale = (double)h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
+ q = pow( BASE_FRAME_DURATION / CLIP_DURATION(rce->i_duration * timescale), 1 - h->param.rc.f_qcompress );
+ }
+ else
+ q = pow( rce->blurred_complexity, 1 - rcc->qcompress );
// avoid NaN's in the rc_eq
if( !isfinite(q) || rce->tex_bits + rce->mv_bits == 0 )
@@ -1712,10 +1752,11 @@
return q;
}
-static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
+static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q, int frame_num)
{
x264_ratecontrol_t *rcc = h->rc;
const int pict_type = rce->pict_type;
+ x264_zone_t *zone = get_zone( h, frame_num );
// force I/B quants as a function of P quants
const double last_p_q = rcc->last_qscale_for[SLICE_TYPE_P];
@@ -1776,23 +1817,32 @@
rcc->accum_p_qp = mask * (qscale2qp( q ) + rcc->accum_p_qp);
rcc->accum_p_norm = mask * (1 + rcc->accum_p_norm);
}
+
+ if( zone )
+ {
+ if( zone->b_force_qp )
+ q = qp2qscale( zone->i_qp );
+ else
+ q /= zone->f_bitrate_factor;
+ }
+
return q;
}
-static double predict_size( predictor_t *p, double q, double var )
+static float predict_size( predictor_t *p, float q, float var )
{
- return (p->coeff*var + p->offset) / (q*p->count);
+ return (p->coeff*var + p->offset) / (q*p->count);
}
-static void update_predictor( predictor_t *p, double q, double var, double bits )
+static void update_predictor( predictor_t *p, float q, float var, float bits )
{
- const double range = 1.5;
+ float range = 1.5;
if( var < 10 )
return;
- double old_coeff = p->coeff / p->count;
- double new_coeff = bits*q / var;
- double new_coeff_clipped = x264_clip3f( new_coeff, old_coeff/range, old_coeff*range );
- double new_offset = bits*q - new_coeff_clipped * var;
+ float old_coeff = p->coeff / p->count;
+ float new_coeff = bits*q / var;
+ float new_coeff_clipped = x264_clip3f( new_coeff, old_coeff/range, old_coeff*range );
+ float new_offset = bits*q - new_coeff_clipped * var;
if( new_offset >= 0 )
new_coeff = new_coeff_clipped;
else
@@ -1829,7 +1879,8 @@
if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size )
{
- filler = ceil( (rct->buffer_fill_final - buffer_size) / (8. * h->sps->vui.i_time_scale) );
+ int64_t scale = (int64_t)h->sps->vui.i_time_scale * 8;
+ filler = (rct->buffer_fill_final - buffer_size + scale - 1) / scale;
bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
}
@@ -1871,7 +1922,7 @@
double bits = t->rc->frame_size_planned;
if( !t->b_thread_active )
continue;
- bits = X264_MAX(bits, t->rc->frame_size_estimated);
+ bits = X264_MAX(bits, t->rc->frame_size_estimated);
rcc->buffer_fill -= bits;
rcc->buffer_fill = X264_MAX( rcc->buffer_fill, 0 );
rcc->buffer_fill += t->rc->buffer_rate;
@@ -2084,6 +2135,9 @@
rcc->frame_size_planned = qscale2bits( &rce, qp2qscale( q ) );
else
rcc->frame_size_planned = predict_size( rcc->pred_b_from_p, qp2qscale( q ), h->fref[1][h->i_ref[1]-1]->i_satd );
+ /* Limit planned size by MinCR */
+ if( rcc->b_vbv )
+ rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum );
h->rc->frame_size_estimated = rcc->frame_size_planned;
/* For row SATDs */
@@ -2114,7 +2168,7 @@
double bits = t->rc->frame_size_planned;
if( !t->b_thread_active )
continue;
- bits = X264_MAX(bits, t->rc->frame_size_estimated);
+ bits = X264_MAX(bits, t->rc->frame_size_estimated);
predicted_bits += (int64_t)bits;
}
}
@@ -2199,6 +2253,7 @@
rce.s_count = 0;
rce.qscale = 1;
rce.pict_type = pict_type;
+ rce.i_duration = h->fenc->i_duration;
if( h->param.rc.i_rc_method == X264_RC_CRF )
{
@@ -2274,6 +2329,9 @@
/* Always use up the whole VBV in this case. */
if( rcc->single_frame_vbv )
rcc->frame_size_planned = rcc->buffer_rate;
+ /* Limit planned size by MinCR */
+ if( rcc->b_vbv )
+ rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum );
h->rc->frame_size_estimated = rcc->frame_size_planned;
return q;
}
@@ -2384,13 +2442,14 @@
COPY(prev_zone);
COPY(qpbuf_pos);
/* these vars can be updated by x264_ratecontrol_init_reconfigurable */
- COPY(buffer_rate);
+ COPY(bitrate);
COPY(buffer_size);
+ COPY(buffer_rate);
+ COPY(vbv_max_rate);
COPY(single_frame_vbv);
COPY(cbr_decay);
- COPY(b_vbv_min_rate);
COPY(rate_factor_constant);
- COPY(bitrate);
+ COPY(rate_factor_max_increment);
#undef COPY
}
if( cur != next )
@@ -2651,14 +2710,14 @@
/* find qscale */
for( int i = 0; i < rcc->num_entries; i++ )
{
- qscale[i] = get_qscale( h, &rcc->entry[i], rate_factor, i );
+ qscale[i] = get_qscale( h, &rcc->entry[i], rate_factor, -1 );
rcc->last_qscale_for[rcc->entry[i].pict_type] = qscale[i];
}
/* fixed I/B qscale relative to P */
for( int i = rcc->num_entries-1; i >= 0; i-- )
{
- qscale[i] = get_diff_limited_q( h, &rcc->entry[i], qscale[i] );
+ qscale[i] = get_diff_limited_q( h, &rcc->entry[i], qscale[i], i );
assert(qscale[i] >= 0);
}
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/encoder/rdo.c
^
|
@@ -205,8 +205,8 @@
/* Really should be 15 bytes, but rounding up a byte saves some
* instructions and is faster, and copying extra data doesn't hurt. */
- COPY_CABAC_PART( significant_coeff_flag_offset[h->mb.b_interlaced][cat], 16 );
- COPY_CABAC_PART( last_coeff_flag_offset[h->mb.b_interlaced][cat], 16 );
+ COPY_CABAC_PART( significant_coeff_flag_offset[MB_INTERLACED][cat], 16 );
+ COPY_CABAC_PART( last_coeff_flag_offset[MB_INTERLACED][cat], 16 );
COPY_CABAC_PART( coeff_abs_level_m1_offset[cat], 10 );
cb->f8_bits_encoded = 0;
}
@@ -387,7 +387,8 @@
}
}
-typedef struct {
+typedef struct
+{
int64_t score;
int level_idx; // index into level_tree[]
uint8_t cabac_state[10]; //just the contexts relevant to coding abs_level_m1
@@ -425,7 +426,7 @@
trellis_node_t *nodes_cur = nodes[0];
trellis_node_t *nodes_prev = nodes[1];
trellis_node_t *bnode;
- const int b_interlaced = h->mb.b_interlaced;
+ const int b_interlaced = MB_INTERLACED;
uint8_t *cabac_state_sig = &h->cabac.state[ significant_coeff_flag_offset[b_interlaced][ctx_block_cat] ];
uint8_t *cabac_state_last = &h->cabac.state[ last_coeff_flag_offset[b_interlaced][ctx_block_cat] ];
const int f = 1 << 15; // no deadzone
@@ -435,7 +436,8 @@
// (# of coefs) * (# of ctx) * (# of levels tried) = 1024
// we don't need to keep all of those: (# of coefs) * (# of ctx) would be enough,
// but it takes more time to remove dead states than you gain in reduced memory.
- struct {
+ struct
+ {
uint16_t abs_level;
uint16_t next;
} level_tree[64*8*2];
@@ -839,12 +841,12 @@
if( h->param.b_cabac )
return quant_trellis_cabac( h, dct,
h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
- NULL, ctx_block_cat==DCT_CHROMA_DC ? x264_zigzag_scan2 : x264_zigzag_scan4[h->mb.b_interlaced],
+ NULL, ctx_block_cat==DCT_CHROMA_DC ? x264_zigzag_scan2 : x264_zigzag_scan4[MB_INTERLACED],
ctx_block_cat, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, 1, ctx_block_cat==DCT_CHROMA_DC ? 4 : 16, 0 );
return quant_trellis_cavlc( h, dct,
h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
- NULL, ctx_block_cat==DCT_CHROMA_DC ? x264_zigzag_scan2 : x264_zigzag_scan4[h->mb.b_interlaced],
+ NULL, ctx_block_cat==DCT_CHROMA_DC ? x264_zigzag_scan2 : x264_zigzag_scan4[MB_INTERLACED],
ctx_block_cat, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, 1, ctx_block_cat==DCT_CHROMA_DC ? 4 : 16, 0, 0 );
}
@@ -855,14 +857,14 @@
if( h->param.b_cabac )
return quant_trellis_cabac( h, dct,
h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
- x264_dct4_weight2_zigzag[h->mb.b_interlaced],
- x264_zigzag_scan4[h->mb.b_interlaced],
+ x264_dct4_weight2_zigzag[MB_INTERLACED],
+ x264_zigzag_scan4[MB_INTERLACED],
ctx_block_cat, h->mb.i_trellis_lambda2[b_chroma][b_intra], b_ac, 0, 16, idx );
return quant_trellis_cavlc( h, dct,
h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
- x264_dct4_weight2_zigzag[h->mb.b_interlaced],
- x264_zigzag_scan4[h->mb.b_interlaced],
+ x264_dct4_weight2_zigzag[MB_INTERLACED],
+ x264_zigzag_scan4[MB_INTERLACED],
ctx_block_cat, h->mb.i_trellis_lambda2[b_chroma][b_intra], b_ac, 0, 16, idx, 0 );
}
@@ -873,8 +875,8 @@
{
return quant_trellis_cabac( h, dct,
h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
- x264_dct8_weight2_zigzag[h->mb.b_interlaced],
- x264_zigzag_scan8[h->mb.b_interlaced],
+ x264_dct8_weight2_zigzag[MB_INTERLACED],
+ x264_zigzag_scan8[MB_INTERLACED],
DCT_LUMA_8x8, h->mb.i_trellis_lambda2[0][b_intra], 0, 0, 64, idx );
}
@@ -884,8 +886,8 @@
{
int nz = quant_trellis_cavlc( h, dct,
h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
- x264_dct8_weight2_zigzag[h->mb.b_interlaced],
- x264_zigzag_scan8[h->mb.b_interlaced],
+ x264_dct8_weight2_zigzag[MB_INTERLACED],
+ x264_zigzag_scan8[MB_INTERLACED],
DCT_LUMA_4x4, h->mb.i_trellis_lambda2[0][b_intra], 0, 0, 16, idx*4+i, 1 );
/* Set up nonzero count for future calls */
h->mb.cache.non_zero_count[x264_scan8[idx*4+i]] = nz;
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/encoder/set.c
^
|
@@ -24,8 +24,6 @@
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
-#include <math.h>
-
#include "common/common.h"
#include "set.h"
@@ -207,18 +205,18 @@
sps->vui.i_sar_height= param->vui.i_sar_height;
}
- sps->vui.b_overscan_info_present = ( param->vui.i_overscan ? 1 : 0 );
+ sps->vui.b_overscan_info_present = param->vui.i_overscan > 0 && param->vui.i_overscan <= 2;
if( sps->vui.b_overscan_info_present )
sps->vui.b_overscan_info = ( param->vui.i_overscan == 2 ? 1 : 0 );
sps->vui.b_signal_type_present = 0;
- sps->vui.i_vidformat = ( param->vui.i_vidformat <= 5 ? param->vui.i_vidformat : 5 );
+ sps->vui.i_vidformat = ( param->vui.i_vidformat >= 0 && param->vui.i_vidformat <= 5 ? param->vui.i_vidformat : 5 );
sps->vui.b_fullrange = ( param->vui.b_fullrange ? 1 : 0 );
sps->vui.b_color_description_present = 0;
- sps->vui.i_colorprim = ( param->vui.i_colorprim <= 9 ? param->vui.i_colorprim : 2 );
- sps->vui.i_transfer = ( param->vui.i_transfer <= 11 ? param->vui.i_transfer : 2 );
- sps->vui.i_colmatrix = ( param->vui.i_colmatrix <= 9 ? param->vui.i_colmatrix : 2 );
+ sps->vui.i_colorprim = ( param->vui.i_colorprim >= 0 && param->vui.i_colorprim <= 8 ? param->vui.i_colorprim : 2 );
+ sps->vui.i_transfer = ( param->vui.i_transfer >= 0 && param->vui.i_transfer <= 10 ? param->vui.i_transfer : 2 );
+ sps->vui.i_colmatrix = ( param->vui.i_colmatrix >= 0 && param->vui.i_colmatrix <= 8 ? param->vui.i_colmatrix : 2 );
if( sps->vui.i_colorprim != 2 ||
sps->vui.i_transfer != 2 ||
sps->vui.i_colmatrix != 2 )
@@ -234,7 +232,7 @@
}
/* FIXME: not sufficient for interlaced video */
- sps->vui.b_chroma_loc_info_present = ( param->vui.i_chroma_loc ? 1 : 0 );
+ sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5;
if( sps->vui.b_chroma_loc_info_present )
{
sps->vui.i_chroma_loc_top = param->vui.i_chroma_loc;
@@ -553,7 +551,6 @@
bs_flush( &q );
x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_RECOVERY_POINT );
-
}
int x264_sei_version_write( x264_t *h, bs_t *s )
@@ -691,6 +688,38 @@
bs_flush( s );
}
+void x264_sei_dec_ref_pic_marking_write( x264_t *h, bs_t *s )
+{
+ x264_slice_header_t *sh = &h->sh_backup;
+ bs_t q;
+ uint8_t tmp_buf[100];
+ bs_init( &q, tmp_buf, 100 );
+
+ bs_realign( &q );
+
+ /* We currently only use this for repeating B-refs, as required by Blu-ray. */
+ bs_write1( &q, 0 ); //original_idr_flag
+ bs_write_ue( &q, sh->i_frame_num ); //original_frame_num
+ if( !h->sps->b_frame_mbs_only )
+ bs_write1( &q, 0 ); //original_field_pic_flag
+
+ bs_write1( &q, sh->i_mmco_command_count > 0 );
+ if( sh->i_mmco_command_count > 0 )
+ {
+ for( int i = 0; i < sh->i_mmco_command_count; i++ )
+ {
+ bs_write_ue( &q, 1 );
+ bs_write_ue( &q, sh->mmco[i].i_difference_of_pic_nums - 1 );
+ }
+ bs_write_ue( &q, 0 );
+ }
+
+ bs_align_10( &q );
+ bs_flush( &q );
+
+ x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_DEC_REF_PIC_MARKING );
+}
+
const x264_level_t x264_levels[] =
{
{ 10, 1485, 99, 152064, 64, 175, 64, 64, 0, 2, 0, 0, 1 },
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/encoder/set.h
^
|
@@ -36,6 +36,7 @@
int x264_validate_levels( x264_t *h, int verbose );
void x264_sei_buffering_period_write( x264_t *h, bs_t *s );
void x264_sei_pic_timing_write( x264_t *h, bs_t *s );
+void x264_sei_dec_ref_pic_marking_write( x264_t *h, bs_t *s );
void x264_sei_frame_packing_write( x264_t *h, bs_t *s );
void x264_sei_write( bs_t *s, uint8_t *payload, int payload_size, int payload_type );
void x264_filler_write( x264_t *h, bs_t *s, int filler );
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/encoder/slicetype.c
^
|
@@ -25,8 +25,6 @@
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
-#include <math.h>
-
#include "common/common.h"
#include "macroblock.h"
#include "me.h"
@@ -169,14 +167,18 @@
for( int x = 0; x < i_width; x += 8, i_mb++, pixoff += 8)
{
w->weightfn[8>>2]( buf, 8, &src[pixoff], i_stride, w, 8 );
- cost += X264_MIN( h->pixf.mbcmp[PIXEL_8x8]( buf, 8, &fenc_plane[pixoff], i_stride ), fenc->i_intra_cost[i_mb] );
+ int cmp = h->pixf.mbcmp[PIXEL_8x8]( buf, 8, &fenc_plane[pixoff], i_stride );
+ cost += X264_MIN( cmp, fenc->i_intra_cost[i_mb] );
}
cost += x264_weight_slice_header_cost( h, w, 0 );
}
else
for( int y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
for( int x = 0; x < i_width; x += 8, i_mb++, pixoff += 8 )
- cost += X264_MIN( h->pixf.mbcmp[PIXEL_8x8]( &src[pixoff], i_stride, &fenc_plane[pixoff], i_stride ), fenc->i_intra_cost[i_mb] );
+ {
+ int cmp = h->pixf.mbcmp[PIXEL_8x8]( &src[pixoff], i_stride, &fenc_plane[pixoff], i_stride );
+ cost += X264_MIN( cmp, fenc->i_intra_cost[i_mb] );
+ }
x264_emms();
return cost;
}
@@ -369,8 +371,8 @@
const int i_stride = fenc->i_stride_lowres;
const int i_pel_offset = 8 * (i_mb_x + i_mb_y * i_stride);
const int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
- int16_t (*fenc_mvs[2])[2] = { &frames[b]->lowres_mvs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mvs[1][p1-b-1][i_mb_xy] };
- int (*fenc_costs[2]) = { &frames[b]->lowres_mv_costs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mv_costs[1][p1-b-1][i_mb_xy] };
+ int16_t (*fenc_mvs[2])[2] = { &fenc->lowres_mvs[0][b-p0-1][i_mb_xy], &fenc->lowres_mvs[1][p1-b-1][i_mb_xy] };
+ int (*fenc_costs[2]) = { &fenc->lowres_mv_costs[0][b-p0-1][i_mb_xy], &fenc->lowres_mv_costs[1][p1-b-1][i_mb_xy] };
int b_frame_score_mb = (i_mb_x > 0 && i_mb_x < h->mb.i_mb_width - 1 &&
i_mb_y > 0 && i_mb_y < h->mb.i_mb_height - 1) ||
h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2;
@@ -578,15 +580,14 @@
i_icost += intra_penalty;
fenc->i_intra_cost[i_mb_xy] = i_icost;
+ int i_icost_aq = i_icost;
+ if( h->param.rc.i_aq_mode )
+ i_icost_aq = (i_icost_aq * fenc->i_inv_qscale_factor[i_mb_xy] + 128) >> 8;
+ fenc->i_row_satds[0][0][h->mb.i_mb_y] += i_icost_aq;
if( b_frame_score_mb )
{
- int *row_satd_intra = frames[b]->i_row_satds[0][0];
- int i_icost_aq = i_icost;
- if( h->param.rc.i_aq_mode )
- i_icost_aq = (i_icost_aq * frames[b]->i_inv_qscale_factor[i_mb_xy] + 128) >> 8;
fenc->i_cost_est[0][0] += i_icost;
fenc->i_cost_est_aq[0][0] += i_icost_aq;
- row_satd_intra[h->mb.i_mb_y] += i_icost_aq;
}
}
@@ -610,13 +611,13 @@
{
int i_bcost_aq = i_bcost;
if( h->param.rc.i_aq_mode )
- i_bcost_aq = (i_bcost_aq * frames[b]->i_inv_qscale_factor[i_mb_xy] + 128) >> 8;
+ i_bcost_aq = (i_bcost_aq * fenc->i_inv_qscale_factor[i_mb_xy] + 128) >> 8;
fenc->i_row_satds[b-p0][p1-b][h->mb.i_mb_y] += i_bcost_aq;
if( b_frame_score_mb )
{
/* Don't use AQ-weighted costs for slicetype decision, only for ratecontrol. */
- frames[b]->i_cost_est[b-p0][p1-b] += i_bcost;
- frames[b]->i_cost_est_aq[b-p0][p1-b] += i_bcost_aq;
+ fenc->i_cost_est[b-p0][p1-b] += i_bcost;
+ fenc->i_cost_est_aq[b-p0][p1-b] += i_bcost_aq;
}
}
@@ -750,8 +751,7 @@
static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance )
{
- int fps_factor_intra = round( CLIP_DURATION(frame->f_duration) / BASE_FRAME_DURATION * 256 );
- int fps_factor_propagate = round( CLIP_DURATION( average_duration) / BASE_FRAME_DURATION * 256 );
+ int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 );
float weightdelta = 0.0;
if( ref0_distance && frame->f_weighted_cost_delta[ref0_distance-1] > 0 )
weightdelta = (1.0 - frame->f_weighted_cost_delta[ref0_distance-1]);
@@ -762,11 +762,10 @@
for( int mb_index = 0; mb_index < h->mb.i_mb_count; mb_index++ )
{
int intra_cost = (frame->i_intra_cost[mb_index] * frame->i_inv_qscale_factor[mb_index] + 128) >> 8;
- int intra_cost_scaled = (intra_cost * fps_factor_intra + 128) >> 8;
if( intra_cost )
{
- int propagate_cost = (frame->i_propagate_cost[mb_index] * fps_factor_propagate + 128) >> 8;
- float log2_ratio = x264_log2(intra_cost_scaled + propagate_cost) - x264_log2(intra_cost) + weightdelta;
+ int propagate_cost = (frame->i_propagate_cost[mb_index] * fps_factor + 128) >> 8;
+ float log2_ratio = x264_log2(intra_cost + propagate_cost) - x264_log2(intra_cost) + weightdelta;
frame->f_qp_offset[mb_index] = frame->f_qp_offset_aq[mb_index] - strength * log2_ratio;
}
}
@@ -1103,9 +1102,9 @@
/* Uses strings due to the fact that the speed of the control functions is
negligible compared to the cost of running slicetype_frame_cost, and because
it makes debugging easier. */
-static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, char (*best_paths)[X264_LOOKAHEAD_MAX] )
+static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, char (*best_paths)[X264_LOOKAHEAD_MAX+1] )
{
- char paths[2][X264_LOOKAHEAD_MAX];
+ char paths[2][X264_LOOKAHEAD_MAX+1];
int num_paths = X264_MIN( h->param.i_bframe+1, length );
int best_cost = COST_MAX;
int idx = 0;
@@ -1256,6 +1255,8 @@
* more RD-optimal. */
if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || vbv_lookahead )
num_frames = framecnt;
+ else if( h->param.b_open_gop && num_frames < framecnt )
+ num_frames++;
else if( num_frames == 0 )
{
frames[1]->i_type = X264_TYPE_I;
@@ -1277,11 +1278,11 @@
{
if( num_frames > 1 )
{
- char best_paths[X264_BFRAME_MAX+1][X264_LOOKAHEAD_MAX] = {"","P"};
- int best_path_index = (num_frames-1) % (X264_BFRAME_MAX+1);
+ char best_paths[X264_BFRAME_MAX+1][X264_LOOKAHEAD_MAX+1] = {"","P"};
+ int best_path_index = num_frames % (X264_BFRAME_MAX+1);
/* Perform the frametype analysis. */
- for( int j = 2; j < num_frames; j++ )
+ for( int j = 2; j <= num_frames; j++ )
x264_slicetype_path( h, &a, frames, j, best_paths );
num_bframes = strspn( best_paths[best_path_index], "B" );
@@ -1375,7 +1376,7 @@
{
frames[i]->i_type = X264_TYPE_I;
reset_start = X264_MIN( reset_start, i+1 );
- if( h->param.i_open_gop == X264_OPEN_GOP_BLURAY )
+ if( h->param.b_open_gop && h->param.b_bluray_compat )
while( IS_X264_TYPE_B( frames[i-1]->i_type ) )
i--;
}
@@ -1463,25 +1464,25 @@
}
if( frm->i_type == X264_TYPE_KEYFRAME )
- frm->i_type = h->param.i_open_gop ? X264_TYPE_I : X264_TYPE_IDR;
+ frm->i_type = h->param.b_open_gop ? X264_TYPE_I : X264_TYPE_IDR;
/* Limit GOP size */
if( (!h->param.b_intra_refresh || frm->i_frame == 0) && frm->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_max )
{
if( frm->i_type == X264_TYPE_AUTO || frm->i_type == X264_TYPE_I )
- frm->i_type = h->param.i_open_gop && h->lookahead->i_last_keyframe >= 0 ? X264_TYPE_I : X264_TYPE_IDR;
+ frm->i_type = h->param.b_open_gop && h->lookahead->i_last_keyframe >= 0 ? X264_TYPE_I : X264_TYPE_IDR;
int warn = frm->i_type != X264_TYPE_IDR;
- if( warn && h->param.i_open_gop )
+ if( warn && h->param.b_open_gop )
warn &= frm->i_type != X264_TYPE_I;
if( warn )
x264_log( h, X264_LOG_WARNING, "specified frame type (%d) at %d is not compatible with keyframe interval\n", frm->i_type, frm->i_frame );
}
if( frm->i_type == X264_TYPE_I && frm->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_min )
{
- if( h->param.i_open_gop )
+ if( h->param.b_open_gop )
{
h->lookahead->i_last_keyframe = frm->i_frame; // Use display order
- if( h->param.i_open_gop == X264_OPEN_GOP_BLURAY )
+ if( h->param.b_bluray_compat )
h->lookahead->i_last_keyframe -= bframes; // Use bluray order
frm->b_keyframe = 1;
}
@@ -1655,7 +1656,7 @@
int ip_factor = 256 * h->param.rc.f_ip_factor; /* fix8 */
for( int y = 0; y < h->mb.i_mb_height; y++ )
{
- int mb_xy = y * h->mb.i_mb_stride;
+ int mb_xy = y * h->mb.i_mb_stride + h->fdec->i_pir_start_col;
for( int x = h->fdec->i_pir_start_col; x <= h->fdec->i_pir_end_col; x++, mb_xy++ )
{
int intra_cost = (h->fenc->i_intra_cost[mb_xy] * ip_factor + 128) >> 8;
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/filters/filters.c
^
|
@@ -27,7 +27,7 @@
#include "filters.h"
#define RETURN_IF_ERROR( cond, ... ) RETURN_IF_ERR( cond, "options", NULL, __VA_ARGS__ )
-char **x264_split_string( char *string, char *sep, uint32_t limit )
+char **x264_split_string( char *string, char *sep, int limit )
{
if( !string )
return NULL;
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/filters/filters.h
^
|
@@ -30,7 +30,7 @@
#include "x264cli.h"
#include "filters/video/video.h"
-char **x264_split_string( char *string, char *sep, uint32_t limit );
+char **x264_split_string( char *string, char *sep, int limit );
void x264_free_string_array( char **array );
char **x264_split_options( const char *opt_str, const char *options[] );
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/filters/video/resize.c
^
|
@@ -41,9 +41,8 @@
#if HAVE_SWSCALE
#undef DECLARE_ALIGNED
#include <libswscale/swscale.h>
-
-/* this function is not a part of the swscale API but is defined in swscale_internal.h */
-const char *sws_format_name( enum PixelFormat format );
+#include <libavutil/opt.h>
+#include <libavutil/pixdesc.h>
typedef struct
{
@@ -61,10 +60,11 @@
int buffer_allocated;
int dst_csp;
struct SwsContext *ctx;
- int ctx_flags;
+ uint32_t ctx_flags;
/* state of swapping chroma planes pre and post resize */
int pre_swap_chroma;
int post_swap_chroma;
+ int variable_input; /* input is capable of changing properties */
frame_prop_t dst; /* desired output properties */
frame_prop_t scale; /* properties of the SwsContext input */
} resizer_hnd_t;
@@ -98,16 +98,6 @@
" - area, bicublin, gauss, sinc, lanczos, spline\n" );
}
-static uint32_t convert_cpu_to_flag( uint32_t cpu )
-{
- uint32_t swscale_cpu = 0;
- if( cpu & X264_CPU_ALTIVEC )
- swscale_cpu |= SWS_CPU_CAPS_ALTIVEC;
- if( cpu & X264_CPU_MMXEXT )
- swscale_cpu |= SWS_CPU_CAPS_MMX | SWS_CPU_CAPS_MMX2;
- return swscale_cpu;
-}
-
static uint32_t convert_method_to_flag( const char *name )
{
uint32_t flag = 0;
@@ -348,6 +338,54 @@
return 0;
}
+static int x264_init_sws_context( resizer_hnd_t *h )
+{
+ if( !h->ctx )
+ {
+ h->ctx = sws_alloc_context();
+ if( !h->ctx )
+ return -1;
+
+ /* set flags that will not change */
+ av_set_int( h->ctx, "sws_flags", h->ctx_flags );
+ av_set_int( h->ctx, "dstw", h->dst.width );
+ av_set_int( h->ctx, "dsth", h->dst.height );
+ av_set_int( h->ctx, "dst_format", h->dst.pix_fmt );
+ av_set_int( h->ctx, "dst_range", 0 ); /* FIXME: use the correct full range value */
+ }
+
+ av_set_int( h->ctx, "srcw", h->scale.width );
+ av_set_int( h->ctx, "srch", h->scale.height );
+ av_set_int( h->ctx, "src_format", h->scale.pix_fmt );
+ av_set_int( h->ctx, "src_range", 0 ); /* FIXME: use the correct full range value */
+
+ /* FIXME: use the correct full range values
+ * FIXME: use the correct matrix coefficients (only YUV -> RGB conversions are supported) */
+ sws_setColorspaceDetails( h->ctx, sws_getCoefficients( SWS_CS_DEFAULT ), 0,
+ sws_getCoefficients( SWS_CS_DEFAULT ), 0, 0, 1<<16, 1<<16 );
+
+ return sws_init_context( h->ctx, NULL, NULL ) < 0;
+}
+
+static int check_resizer( resizer_hnd_t *h, cli_pic_t *in, int frame )
+{
+ frame_prop_t input_prop = { in->img.width, in->img.height, convert_csp_to_pix_fmt( in->img.csp ) };
+ if( !memcmp( &input_prop, &h->scale, sizeof(frame_prop_t) ) )
+ return 0;
+ /* also warn if the resizer was initialized after the first frame */
+ if( h->ctx || frame )
+ x264_cli_log( NAME, X264_LOG_WARNING, "stream properties changed at pts %"PRId64"\n", in->pts );
+ h->scale = input_prop;
+ if( !h->buffer_allocated )
+ {
+ if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) )
+ return -1;
+ h->buffer_allocated = 1;
+ }
+ FAIL_IF_ERROR( x264_init_sws_context( h ), "swscale init failed\n" )
+ return 0;
+}
+
static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x264_param_t *param, char *opt_string )
{
/* if called for normalizing the csp to known formats and the format is not unknown, exit */
@@ -372,6 +410,8 @@
h->dst.height = info->height;
if( !strcmp( opt_string, "normcsp" ) )
{
+ /* only in normalization scenarios is the input capable of changing properties */
+ h->variable_input = 1;
h->dst_csp = pick_closest_supported_csp( info->csp );
/* now fix the catch-all i420 choice if it does not allow for the current input resolution dimensions. */
if( h->dst_csp == X264_CSP_I420 && info->width&1 )
@@ -388,11 +428,10 @@
h->dst.width = param->i_width;
h->dst.height = param->i_height;
}
- uint32_t method = convert_method_to_flag( x264_otos( x264_get_option( optlist[5], opts ), "" ) );
+ h->ctx_flags = convert_method_to_flag( x264_otos( x264_get_option( optlist[5], opts ), "" ) );
x264_free_string_array( opts );
- h->ctx_flags = convert_cpu_to_flag( param->cpu ) | method;
- if( method != SWS_FAST_BILINEAR )
+ if( h->ctx_flags != SWS_FAST_BILINEAR )
h->ctx_flags |= SWS_FULL_CHR_H_INT | SWS_FULL_CHR_H_INP | SWS_ACCURATE_RND;
h->dst.pix_fmt = convert_csp_to_pix_fmt( h->dst_csp );
h->scale = h->dst;
@@ -408,13 +447,13 @@
/* confirm swscale can support this conversion */
FAIL_IF_ERROR( src_pix_fmt == PIX_FMT_NONE && src_pix_fmt_inv != PIX_FMT_NONE,
- "input colorspace %s with bit depth %d is not supported\n", sws_format_name( src_pix_fmt_inv ),
+ "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( src_pix_fmt_inv ),
info->csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
- FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", sws_format_name( src_pix_fmt ) )
+ FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", av_get_pix_fmt_name( src_pix_fmt ) )
FAIL_IF_ERROR( h->dst.pix_fmt == PIX_FMT_NONE && dst_pix_fmt_inv != PIX_FMT_NONE,
- "input colorspace %s with bit depth %d is not supported\n", sws_format_name( dst_pix_fmt_inv ),
+ "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( dst_pix_fmt_inv ),
h->dst_csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
- FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", sws_format_name( h->dst.pix_fmt ) )
+ FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", av_get_pix_fmt_name( h->dst.pix_fmt ) )
FAIL_IF_ERROR( h->dst.height != info->height && info->interlaced,
"swscale is not compatible with interlaced vertical resizing\n" )
/* confirm that the desired resolution meets the colorspace requirements */
@@ -426,8 +465,17 @@
x264_cli_log( NAME, X264_LOG_INFO, "resizing to %dx%d\n", h->dst.width, h->dst.height );
if( h->dst.pix_fmt != src_pix_fmt )
x264_cli_log( NAME, X264_LOG_WARNING, "converting from %s to %s\n",
- sws_format_name( src_pix_fmt ), sws_format_name( h->dst.pix_fmt ) );
+ av_get_pix_fmt_name( src_pix_fmt ), av_get_pix_fmt_name( h->dst.pix_fmt ) );
h->dst_csp |= info->csp & X264_CSP_VFLIP; // preserve vflip
+
+ /* if the input is not variable, initialize the context */
+ if( !h->variable_input )
+ {
+ cli_pic_t input_pic = {{info->csp, info->width, info->height, 0}, 0};
+ if( check_resizer( h, &input_pic, 0 ) )
+ return -1;
+ }
+
/* finished initing, overwrite values */
info->csp = h->dst_csp;
info->width = h->dst.width;
@@ -441,35 +489,12 @@
return 0;
}
-static int check_resizer( resizer_hnd_t *h, cli_pic_t *in )
-{
- frame_prop_t input_prop = { in->img.width, in->img.height, convert_csp_to_pix_fmt( in->img.csp ) };
- if( !memcmp( &input_prop, &h->scale, sizeof(frame_prop_t) ) )
- return 0;
- if( h->ctx )
- {
- sws_freeContext( h->ctx );
- x264_cli_log( NAME, X264_LOG_WARNING, "stream properties changed at pts %"PRId64"\n", in->pts );
- }
- h->scale = input_prop;
- if( !h->buffer_allocated )
- {
- if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) )
- return -1;
- h->buffer_allocated = 1;
- }
- h->ctx = sws_getContext( h->scale.width, h->scale.height, h->scale.pix_fmt, h->dst.width,
- h->dst.height, h->dst.pix_fmt, h->ctx_flags, NULL, NULL, NULL );
- FAIL_IF_ERROR( !h->ctx, "swscale init failed\n" )
- return 0;
-}
-
static int get_frame( hnd_t handle, cli_pic_t *output, int frame )
{
resizer_hnd_t *h = handle;
if( h->prev_filter.get_frame( h->prev_hnd, output, frame ) )
return -1;
- if( check_resizer( h, output ) )
+ if( h->variable_input && check_resizer( h, output, frame ) )
return -1;
if( h->pre_swap_chroma )
XCHG( uint8_t*, output->img.plane[1], output->img.plane[2] );
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/input/ffms.c
^
|
@@ -85,8 +85,13 @@
}
if( !idx )
{
- idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e );
- fprintf( stderr, " \r" );
+ if( opt->progress )
+ {
+ idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e );
+ fprintf( stderr, " \r" );
+ }
+ else
+ idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, NULL, NULL, &e );
FAIL_IF_ERROR( !idx, "could not create index\n" )
if( opt->index_file && FFMS_WriteIndex( opt->index_file, idx, &e ) )
x264_cli_log( "ffms", X264_LOG_WARNING, "could not write index file\n" );
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/input/input.h
^
|
@@ -40,6 +40,7 @@
int bit_depth;
char *timebase;
int seek;
+ int progress;
} cli_input_opt_t;
/* properties of the source given by the demuxer */
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/input/lavf.c
^
|
@@ -145,6 +145,7 @@
return -1;
sscanf( opt->resolution, "%dx%d", ¶m->width, ¶m->height );
param->pix_fmt = opt->colorspace ? av_get_pix_fmt( opt->colorspace ) : PIX_FMT_YUV420P;
+ FAIL_IF_ERROR( param->pix_fmt == PIX_FMT_NONE, "unsupported colorspace: %s\n", opt->colorspace );
}
/* specify the input format. this is helpful when lavf fails to guess */
@@ -158,7 +159,7 @@
FAIL_IF_ERROR( av_find_stream_info( h->lavf ) < 0, "could not find input stream info\n" )
int i = 0;
- while( i < h->lavf->nb_streams && h->lavf->streams[i]->codec->codec_type != CODEC_TYPE_VIDEO )
+ while( i < h->lavf->nb_streams && h->lavf->streams[i]->codec->codec_type != AVMEDIA_TYPE_VIDEO )
i++;
FAIL_IF_ERROR( i == h->lavf->nb_streams, "could not find video stream\n" )
h->stream_id = i;
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/input/timecode.c
^
|
@@ -25,7 +25,6 @@
#include "input.h"
#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "timecode", __VA_ARGS__ )
-#include <math.h>
typedef struct
{
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/input/y4m.c
^
|
@@ -201,7 +201,7 @@
static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h )
{
- int slen = strlen( Y4M_FRAME_MAGIC );
+ size_t slen = strlen( Y4M_FRAME_MAGIC );
int i = 0;
char header[16];
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/output/mp4.c
^
|
@@ -30,8 +30,10 @@
#if HAVE_GF_MALLOC
#undef malloc
#undef free
+#undef realloc
#define malloc gf_malloc
#define free gf_free
+#define realloc gf_realloc
#endif
typedef struct
@@ -49,6 +51,7 @@
int i_delay_frames;
int b_dts_compress;
int i_dts_compress_multiplier;
+ int i_data_size;
} mp4_hnd_t;
static void recompute_bitrate_mp4( GF_ISOFile *p_file, int i_track )
@@ -233,10 +236,27 @@
gf_isom_set_track_layout_info( p_mp4->p_file, p_mp4->i_track, dw, dh, 0, 0, 0 );
}
- p_mp4->p_sample->data = malloc( p_param->i_width * p_param->i_height * 3 / 2 );
+ p_mp4->i_data_size = p_param->i_width * p_param->i_height * 3 / 2;
+ p_mp4->p_sample->data = malloc( p_mp4->i_data_size );
if( !p_mp4->p_sample->data )
+ {
+ p_mp4->i_data_size = 0;
return -1;
+ }
+
+ return 0;
+}
+static int check_buffer( mp4_hnd_t *p_mp4, int needed_size )
+{
+ if( needed_size > p_mp4->i_data_size )
+ {
+ void *ptr = realloc( p_mp4->p_sample->data, needed_size );
+ if( !ptr )
+ return -1;
+ p_mp4->p_sample->data = ptr;
+ p_mp4->i_data_size = needed_size;
+ }
return 0;
}
@@ -284,6 +304,8 @@
// SEI
+ if( check_buffer( p_mp4, p_mp4->p_sample->dataLength + sei_size ) )
+ return -1;
memcpy( p_mp4->p_sample->data + p_mp4->p_sample->dataLength, sei, sei_size );
p_mp4->p_sample->dataLength += sei_size;
@@ -296,6 +318,8 @@
int64_t dts;
int64_t cts;
+ if( check_buffer( p_mp4, p_mp4->p_sample->dataLength + i_size ) )
+ return -1;
memcpy( p_mp4->p_sample->data + p_mp4->p_sample->dataLength, p_nalu, i_size );
p_mp4->p_sample->dataLength += i_size;
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/tools/checkasm.c
^
|
@@ -26,10 +26,6 @@
*****************************************************************************/
#include <ctype.h>
-#include <stdlib.h>
-#include <limits.h>
-#include <math.h>
-
#include "common/common.h"
#include "common/cpu.h"
@@ -61,14 +57,16 @@
#define MAX_FUNCS 1000 // just has to be big enough to hold all the existing functions
#define MAX_CPUS 10 // number of different combinations of cpu flags
-typedef struct {
+typedef struct
+{
void *pointer; // just for detecting duplicates
uint32_t cpu;
uint32_t cycles;
uint32_t den;
} bench_t;
-typedef struct {
+typedef struct
+{
char *name;
bench_t vers[MAX_CPUS];
} bench_func_t;
@@ -90,7 +88,7 @@
static inline uint32_t read_time(void)
{
uint32_t a = 0;
-#if defined(__GNUC__) && (ARCH_X86 || ARCH_X86_64)
+#if HAVE_X86_INLINE_ASM
asm volatile( "rdtsc" :"=a"(a) ::"edx" );
#elif ARCH_PPC
asm volatile( "mftb %0" : "=r" (a) );
@@ -419,6 +417,26 @@
}
report( "pixel hadamard_ac :" );
+ ok = 1; used_asm = 0;
+ if( pixel_asm.vsad != pixel_ref.vsad )
+ {
+ for( int h = 2; h <= 32; h += 2 )
+ {
+ int res_c, res_asm;
+ set_func_name( "vsad" );
+ used_asm = 1;
+ res_c = call_c( pixel_c.vsad, pbuf1, 16, h );
+ res_asm = call_a( pixel_asm.vsad, pbuf1, 16, h );
+ if( res_c != res_asm )
+ {
+ ok = 0;
+ fprintf( stderr, "vsad: height=%d, %d != %d\n", h, res_c, res_asm );
+ break;
+ }
+ }
+ }
+ report( "pixel vsad :" );
+
#define TEST_INTRA_MBCMP( name, pred, satd, i8x8, ... ) \
if( pixel_asm.name && pixel_asm.name != pixel_ref.name ) \
{ \
@@ -532,7 +550,7 @@
x264_dct_function_t dct_ref;
x264_dct_function_t dct_asm;
x264_quant_function_t qf;
- int ret = 0, ok, used_asm, interlace;
+ int ret = 0, ok, used_asm, interlace = 0;
ALIGNED_16( dctcoef dct1[16][16] );
ALIGNED_16( dctcoef dct2[16][16] );
ALIGNED_16( dctcoef dct4[16][16] );
@@ -697,21 +715,21 @@
TEST_DCTDC( idct4x4dc );
#undef TEST_DCTDC
- x264_zigzag_function_t zigzag_c;
- x264_zigzag_function_t zigzag_ref;
- x264_zigzag_function_t zigzag_asm;
+ x264_zigzag_function_t zigzag_c[2];
+ x264_zigzag_function_t zigzag_ref[2];
+ x264_zigzag_function_t zigzag_asm[2];
ALIGNED_16( dctcoef level1[64] );
ALIGNED_16( dctcoef level2[64] );
#define TEST_ZIGZAG_SCAN( name, t1, t2, dct, size ) \
- if( zigzag_asm.name != zigzag_ref.name ) \
+ if( zigzag_asm[interlace].name != zigzag_ref[interlace].name ) \
{ \
set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
used_asm = 1; \
memcpy(dct, buf1, size*sizeof(dctcoef)); \
- call_c( zigzag_c.name, t1, dct ); \
- call_a( zigzag_asm.name, t2, dct ); \
+ call_c( zigzag_c[interlace].name, t1, dct ); \
+ call_a( zigzag_asm[interlace].name, t2, dct ); \
if( memcmp( t1, t2, size*sizeof(dctcoef) ) ) \
{ \
ok = 0; \
@@ -720,26 +738,26 @@
}
#define TEST_ZIGZAG_SUB( name, t1, t2, size ) \
- if( zigzag_asm.name != zigzag_ref.name ) \
+ if( zigzag_asm[interlace].name != zigzag_ref[interlace].name ) \
{ \
int nz_a, nz_c; \
set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
used_asm = 1; \
memcpy( pbuf3, pbuf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
memcpy( pbuf4, pbuf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
- nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3 ); \
- nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4 ); \
+ nz_c = call_c1( zigzag_c[interlace].name, t1, pbuf2, pbuf3 ); \
+ nz_a = call_a1( zigzag_asm[interlace].name, t2, pbuf2, pbuf4 ); \
if( memcmp( t1, t2, size*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE*sizeof(pixel) ) || nz_c != nz_a ) \
{ \
ok = 0; \
fprintf( stderr, #name " [FAILED]\n" ); \
} \
- call_c2( zigzag_c.name, t1, pbuf2, pbuf3 ); \
- call_a2( zigzag_asm.name, t2, pbuf2, pbuf4 ); \
+ call_c2( zigzag_c[interlace].name, t1, pbuf2, pbuf3 ); \
+ call_a2( zigzag_asm[interlace].name, t2, pbuf2, pbuf4 ); \
}
#define TEST_ZIGZAG_SUBAC( name, t1, t2 ) \
- if( zigzag_asm.name != zigzag_ref.name ) \
+ if( zigzag_asm[interlace].name != zigzag_ref[interlace].name ) \
{ \
int nz_a, nz_c; \
dctcoef dc_a, dc_c; \
@@ -754,8 +772,8 @@
memcpy( pbuf3 + j*FDEC_STRIDE, (i?pbuf1:pbuf2) + j*FENC_STRIDE, 4 * sizeof(pixel) ); \
memcpy( pbuf4 + j*FDEC_STRIDE, (i?pbuf1:pbuf2) + j*FENC_STRIDE, 4 * sizeof(pixel) ); \
} \
- nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3, &dc_c ); \
- nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4, &dc_a ); \
+ nz_c = call_c1( zigzag_c[interlace].name, t1, pbuf2, pbuf3, &dc_c ); \
+ nz_a = call_a1( zigzag_asm[interlace].name, t2, pbuf2, pbuf4, &dc_a ); \
if( memcmp( t1+1, t2+1, 15*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE * sizeof(pixel) ) || nz_c != nz_a || dc_c != dc_a ) \
{ \
ok = 0; \
@@ -763,12 +781,12 @@
break; \
} \
} \
- call_c2( zigzag_c.name, t1, pbuf2, pbuf3, &dc_c ); \
- call_a2( zigzag_asm.name, t2, pbuf2, pbuf4, &dc_a ); \
+ call_c2( zigzag_c[interlace].name, t1, pbuf2, pbuf3, &dc_c ); \
+ call_a2( zigzag_asm[interlace].name, t2, pbuf2, pbuf4, &dc_a ); \
}
#define TEST_INTERLEAVE( name, t1, t2, dct, size ) \
- if( zigzag_asm.name != zigzag_ref.name ) \
+ if( zigzag_asm[interlace].name != zigzag_ref[interlace].name ) \
{ \
for( int j = 0; j < 100; j++ ) \
{ \
@@ -778,8 +796,8 @@
for( int i = 0; i < size; i++ ) \
dct[i] = rand()&0x1F ? 0 : dct[i]; \
memcpy(buf3, buf4, 10); \
- call_c( zigzag_c.name, t1, dct, buf3 ); \
- call_a( zigzag_asm.name, t2, dct, buf4 ); \
+ call_c( zigzag_c[interlace].name, t1, dct, buf3 ); \
+ call_a( zigzag_asm[interlace].name, t2, dct, buf4 ); \
if( memcmp( t1, t2, size*sizeof(dctcoef) ) || memcmp( buf3, buf4, 10 ) ) \
{ \
ok = 0; \
@@ -787,33 +805,23 @@
} \
}
- interlace = 0;
- x264_zigzag_init( 0, &zigzag_c, 0 );
- x264_zigzag_init( cpu_ref, &zigzag_ref, 0 );
- x264_zigzag_init( cpu_new, &zigzag_asm, 0 );
-
- ok = 1; used_asm = 0;
- TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64 );
- TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16 );
- TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16 );
- TEST_ZIGZAG_SUBAC( sub_4x4ac, level1, level2 );
- report( "zigzag_frame :" );
-
- interlace = 1;
- x264_zigzag_init( 0, &zigzag_c, 1 );
- x264_zigzag_init( cpu_ref, &zigzag_ref, 1 );
- x264_zigzag_init( cpu_new, &zigzag_asm, 1 );
-
- ok = 1; used_asm = 0;
- TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64 );
- TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16 );
- TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16 );
- TEST_ZIGZAG_SUBAC( sub_4x4ac, level1, level2 );
- report( "zigzag_field :" );
+ x264_zigzag_init( 0, &zigzag_c[0], &zigzag_c[1] );
+ x264_zigzag_init( cpu_ref, &zigzag_ref[0], &zigzag_ref[1] );
+ x264_zigzag_init( cpu_new, &zigzag_asm[0], &zigzag_asm[1] );
ok = 1; used_asm = 0;
TEST_INTERLEAVE( interleave_8x8_cavlc, level1, level2, dct1[0], 64 );
report( "zigzag_interleave :" );
+
+ for( interlace = 0; interlace <= 1; interlace++ )
+ {
+ ok = 1; used_asm = 0;
+ TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64 );
+ TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16 );
+ TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16 );
+ TEST_ZIGZAG_SUBAC( sub_4x4ac, level1, level2 );
+ report( interlace ? "zigzag_field :" : "zigzag_frame :" );
+ }
#undef TEST_ZIGZAG_SCAN
#undef TEST_ZIGZAG_SUB
@@ -1247,8 +1255,8 @@
int *dstc = dsta+400;
uint16_t *prop = (uint16_t*)buf1;
uint16_t *intra = (uint16_t*)buf4;
- uint16_t *inter = intra+100;
- uint16_t *qscale = inter+100;
+ uint16_t *inter = intra+128;
+ uint16_t *qscale = inter+128;
uint16_t *rnd = (uint16_t*)buf2;
x264_emms();
for( int j = 0; j < 100; j++ )
@@ -1268,6 +1276,44 @@
report( "mbtree propagate :" );
}
+ if( mc_a.memcpy_aligned != mc_ref.memcpy_aligned )
+ {
+ set_func_name( "memcpy_aligned" );
+ ok = 1; used_asm = 1;
+ for( int size = 16; size < 256; size += 16 )
+ {
+ memset( buf4, 0xAA, size + 1 );
+ call_c( mc_c.memcpy_aligned, buf3, buf1, size );
+ call_a( mc_a.memcpy_aligned, buf4, buf1, size );
+ if( memcmp( buf3, buf4, size ) || buf4[size] != 0xAA )
+ {
+ ok = 0;
+ fprintf( stderr, "memcpy_aligned FAILED: size=%d\n", size );
+ break;
+ }
+ }
+ report( "memcpy aligned :" );
+ }
+
+ if( mc_a.memzero_aligned != mc_ref.memzero_aligned )
+ {
+ set_func_name( "memzero_aligned" );
+ ok = 1; used_asm = 1;
+ for( int size = 128; size < 1024; size += 128 )
+ {
+ memset( buf4, 0xAA, size + 1 );
+ call_c( mc_c.memzero_aligned, buf3, size );
+ call_a( mc_a.memzero_aligned, buf4, size );
+ if( memcmp( buf3, buf4, size ) || buf4[size] != 0xAA )
+ {
+ ok = 0;
+ fprintf( stderr, "memzero_aligned FAILED: size=%d\n", size );
+ break;
+ }
+ }
+ report( "memzero aligned :" );
+ }
+
return ret;
}
@@ -1280,9 +1326,9 @@
int alphas[36], betas[36];
int8_t tcs[36][4];
- x264_deblock_init( 0, &db_c );
- x264_deblock_init( cpu_ref, &db_ref );
- x264_deblock_init( cpu_new, &db_a );
+ x264_deblock_init( 0, &db_c, 0 );
+ x264_deblock_init( cpu_ref, &db_ref, 0 );
+ x264_deblock_init( cpu_new, &db_a, 0 );
/* not exactly the real values of a,b,tc but close enough */
for( int i = 35, a = 255, c = 250; i >= 0; i-- )
@@ -1337,7 +1383,8 @@
ALIGNED_ARRAY_16( uint8_t, nnz, [X264_SCAN8_SIZE] );
ALIGNED_4( int8_t ref[2][X264_SCAN8_LUMA_SIZE] );
ALIGNED_ARRAY_16( int16_t, mv, [2],[X264_SCAN8_LUMA_SIZE][2] );
- ALIGNED_ARRAY_16( uint8_t, bs, [2],[2][4][4] );
+ ALIGNED_ARRAY_16( uint8_t, bs, [2],[2][8][4] );
+ memset( bs, 99, sizeof(bs) );
for( int j = 0; j < X264_SCAN8_SIZE; j++ )
nnz[j] = ((rand()&7) == 7) * rand() & 0xf;
for( int j = 0; j < 2; j++ )
@@ -1348,8 +1395,8 @@
mv[j][k][l] = ((rand()&7) != 7) ? (rand()&7) - 3 : (rand()&1023) - 512;
}
set_func_name( "deblock_strength" );
- call_c( db_c.deblock_strength, nnz, ref, mv, bs[0], 2<<(i&1), ((i>>1)&1) );
- call_a( db_a.deblock_strength, nnz, ref, mv, bs[1], 2<<(i&1), ((i>>1)&1) );
+ call_c( db_c.deblock_strength, nnz, ref, mv, bs[0], 2<<(i&1), ((i>>1)&1), NULL );
+ call_a( db_a.deblock_strength, nnz, ref, mv, bs[1], 2<<(i&1), ((i>>1)&1), NULL );
if( memcmp( bs[0], bs[1], sizeof(bs[0]) ) )
{
ok = 0;
@@ -1384,7 +1431,7 @@
ALIGNED_16( dctcoef dct2[64] );
ALIGNED_16( uint8_t cqm_buf[64] );
int ret = 0, ok, used_asm;
- int oks[2] = {1,1}, used_asms[2] = {0,0};
+ int oks[3] = {1,1,1}, used_asms[3] = {0,0,0};
x264_t h_buf;
x264_t *h = &h_buf;
memset( h, 0, sizeof(*h) );
@@ -1558,6 +1605,41 @@
TEST_DEQUANT_DC( quant_4x4_dc, dequant_4x4_dc, CQM_4IY, 4 );
+#define TEST_OPTIMIZE_CHROMA_DC( qname, optname, w ) \
+ if( qf_a.optname != qf_ref.optname ) \
+ { \
+ set_func_name( #optname ); \
+ used_asms[2] = 1; \
+ for( int qp = h->param.rc.i_qp_max; qp >= h->param.rc.i_qp_min; qp-- ) \
+ { \
+ int dmf = h->dequant4_mf[CQM_4IC][qp%6][0] << qp/6; \
+ if( dmf > 32*64 ) \
+ continue; \
+ for( int i = 16; ; i <<= 1 )\
+ { \
+ int res_c, res_asm; \
+ int max = X264_MIN( i, PIXEL_MAX*16 ); \
+ for( int j = 0; j < w*w; j++ ) \
+ dct1[j] = rand()%(max*2+1) - max; \
+ call_c1( qf_c.qname, dct1, h->quant4_mf[CQM_4IC][qp][0]>>1, h->quant4_bias[CQM_4IC][qp][0]>>1 ); \
+ memcpy( dct2, dct1, w*w*sizeof(dctcoef) ); \
+ res_c = call_c1( qf_c.optname, dct1, dmf ); \
+ res_asm = call_a1( qf_a.optname, dct2, dmf ); \
+ if( res_c != res_asm || memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) ) \
+ { \
+ oks[2] = 0; \
+ fprintf( stderr, #optname "(qp=%d, res_c=%d, res_asm=%d): [FAILED]\n", qp, res_c, res_asm ); \
+ } \
+ call_c2( qf_c.optname, dct1, dmf ); \
+ call_a2( qf_a.optname, dct2, dmf ); \
+ if( i >= PIXEL_MAX*16 ) \
+ break; \
+ } \
+ } \
+ }
+
+ TEST_OPTIMIZE_CHROMA_DC( quant_2x2_dc, optimize_chroma_dc, 2 );
+
x264_cqm_delete( h );
}
@@ -1567,6 +1649,9 @@
ok = oks[1]; used_asm = used_asms[1];
report( "dequant :" );
+ ok = oks[2]; used_asm = used_asms[2];
+ report( "optimize chroma dc :" );
+
ok = 1; used_asm = 0;
if( qf_a.denoise_dct != qf_ref.denoise_dct )
{
@@ -1858,6 +1943,7 @@
int ret = 0, ok, used_asm = 1;
if( cpu_ref || run_cabac_decision_c == run_cabac_decision_asm )
return 0;
+ x264_cabac_init();
set_func_name( "cabac_encode_decision" );
memcpy( buf4, buf3, 0x1000 );
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/tools/test_x264.py
^
|
@@ -35,6 +35,7 @@
("", "--intra-refresh"),
("", "--no-cabac"),
("", "--interlaced"),
+ ("", "--slice-max-size 1000"),
("", "--frame-packing 5"),
[ "--preset %s" % p for p in ("ultrafast",
"superfast",
@@ -260,6 +261,7 @@
ffmpeg_proc = Popen([
"ffmpeg",
+ "-vsync 0",
"-i",
"%s.264" % self.fixture.dispatcher.video,
"ffmpeg-output.yuv"
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/x264.c
^
|
@@ -27,13 +27,9 @@
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
-#include <stdlib.h>
-#include <math.h>
-
#include <signal.h>
#define _GNU_SOURCE
#include <getopt.h>
-
#include "common/common.h"
#include "x264cli.h"
#include "input/input.h"
@@ -74,6 +70,8 @@
b_ctrl_c = 1;
}
+static char UNUSED originalCTitle[200] = "";
+
typedef struct {
int b_progress;
int i_seek;
@@ -124,7 +122,8 @@
static const char * const pulldown_names[] = { "none", "22", "32", "64", "double", "triple", "euro", 0 };
static const char * const log_level_names[] = { "none", "error", "warning", "info", "debug", 0 };
-typedef struct{
+typedef struct
+{
int mod;
uint8_t pattern[24];
float fps_factor;
@@ -227,10 +226,12 @@
printf( "(ffmpegsource %d.%d.%d.%d)\n", FFMS_VERSION >> 24, (FFMS_VERSION & 0xff0000) >> 16, (FFMS_VERSION & 0xff00) >> 8, FFMS_VERSION & 0xff );
#endif
printf( "built on " __DATE__ ", " );
-#ifdef __GNUC__
+#ifdef __INTEL_COMPILER
+ printf( "intel: %.2f (%d)\n", __INTEL_COMPILER / 100.f, __INTEL_COMPILER_BUILD_DATE );
+#elif defined(__GNUC__)
printf( "gcc: " __VERSION__ "\n" );
#else
- printf( "using a non-gcc compiler\n" );
+ printf( "using an unknown compiler\n" );
#endif
printf( "configuration: --bit-depth=%d\n", x264_bit_depth );
printf( "x264 license: " );
@@ -262,10 +263,15 @@
_setmode(_fileno(stdout), _O_BINARY);
#endif
+ GetConsoleTitle( originalCTitle, sizeof(originalCTitle) );
+
/* Parse command line */
if( parse( argc, argv, ¶m, &opt ) < 0 )
ret = -1;
+ /* Restore title; it can be changed by input modules */
+ SetConsoleTitle( originalCTitle );
+
/* Control-C handler */
signal( SIGINT, sigint_handler );
@@ -284,6 +290,8 @@
if( opt.qpfile )
fclose( opt.qpfile );
+ SetConsoleTitle( originalCTitle );
+
return ret;
}
@@ -323,11 +331,11 @@
printf( "\n" );
printf( " - valid csps for `lavf' demuxer:\n" );
printf( INDENT );
- int line_len = strlen( INDENT );
+ size_t line_len = strlen( INDENT );
for( enum PixelFormat i = PIX_FMT_NONE+1; i < PIX_FMT_NB; i++ )
{
const char *pfname = av_pix_fmt_descriptors[i].name;
- int name_len = strlen( pfname );
+ size_t name_len = strlen( pfname );
if( line_len + name_len > (80 - strlen( ", " )) )
{
printf( "\n" INDENT );
@@ -533,11 +541,7 @@
" - strict: Strictly hierarchical pyramid\n"
" - normal: Non-strict (not Blu-ray compatible)\n",
strtable_lookup( x264_b_pyramid_names, defaults->i_bframe_pyramid ) );
- H1( " --open-gop <string> Use recovery points to close GOPs [none]\n"
- " - none: closed GOPs only\n"
- " - normal: standard open GOPs\n"
- " (not Blu-ray compatible)\n"
- " - bluray: Blu-ray-compatible open GOPs\n"
+ H1( " --open-gop Use recovery points to close GOPs\n"
" Only available with b-frames\n" );
H1( " --no-cabac Disable CABAC\n" );
H1( " -r, --ref <integer> Number of reference frames [%d]\n", defaults->i_frame_reference );
@@ -733,6 +737,7 @@
H0( " --seek <integer> First frame to encode\n" );
H0( " --frames <integer> Maximum number of frames to encode\n" );
H0( " --level <string> Specify level (as defined by Annex A)\n" );
+ H1( " --bluray-compat Enable compatibility hacks for Blu-ray support\n" );
H1( "\n" );
H1( " -v, --verbose Print stats for each frame\n" );
H1( " --no-progress Don't show the progress indicator while encoding\n" );
@@ -823,7 +828,8 @@
{ "no-b-adapt", no_argument, NULL, 0 },
{ "b-bias", required_argument, NULL, 0 },
{ "b-pyramid", required_argument, NULL, 0 },
- { "open-gop", required_argument, NULL, 0 },
+ { "open-gop", no_argument, NULL, 0 },
+ { "bluray-compat", no_argument, NULL, 0 },
{ "min-keyint", required_argument, NULL, 'i' },
{ "keyint", required_argument, NULL, 'I' },
{ "intra-refresh", no_argument, NULL, 0 },
@@ -1393,6 +1399,8 @@
info.tff = param->b_tff;
info.vfr = param->b_vfr_input;
+ input_opt.progress = opt->b_progress;
+
if( select_input( demuxer, demuxername, input_filename, &opt->hin, &info, &input_opt ) )
return -1;
@@ -1488,11 +1496,15 @@
if( !b_user_interlaced && info.interlaced )
{
+#if HAVE_INTERLACED
x264_cli_log( "x264", X264_LOG_WARNING, "input appears to be interlaced, enabling %cff interlaced mode.\n"
" If you want otherwise, use --no-interlaced or --%cff\n",
info.tff ? 't' : 'b', info.tff ? 'b' : 't' );
param->b_interlaced = 1;
param->b_tff = !!info.tff;
+#else
+ x264_cli_log( "x264", X264_LOG_WARNING, "input appears to be interlaced, but not compiled with interlaced support\n" );
+#endif
}
/* Automatically reduce reference frame count to match the user's target level
@@ -1644,9 +1656,6 @@
double duration;
double pulldown_pts = 0;
int retval = 0;
- char UNUSED originalCTitle[200] = "";
-
- GetConsoleTitle( originalCTitle, sizeof(originalCTitle) );
opt->b_progress &= param->i_log_level < X264_LOG_DEBUG;
@@ -1805,7 +1814,5 @@
(double) i_file * 8 / ( 1000 * duration ) );
}
- SetConsoleTitle( originalCTitle );
-
return retval;
}
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/x264.h
^
|
@@ -41,7 +41,7 @@
#include "x264_config.h"
-#define X264_BUILD 114
+#define X264_BUILD 115
/* x264_t:
* opaque handler for encoder */
@@ -162,9 +162,6 @@
#define X264_B_PYRAMID_NORMAL 2
#define X264_KEYINT_MIN_AUTO 0
#define X264_KEYINT_MAX_INFINITE (1<<30)
-#define X264_OPEN_GOP_NONE 0
-#define X264_OPEN_GOP_NORMAL 1
-#define X264_OPEN_GOP_BLURAY 2
static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };
@@ -176,7 +173,6 @@
static const char * const x264_transfer_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "linear", "log100", "log316", 0 };
static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", "", "fcc", "bt470bg", "smpte170m", "smpte240m", "YCgCo", 0 };
static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
-static const char * const x264_open_gop_names[] = { "none", "normal", "bluray", 0 };
/* Colorspace type */
#define X264_CSP_MASK 0x00ff /* */
@@ -281,7 +277,8 @@
int i_bframe_adaptive;
int i_bframe_bias;
int i_bframe_pyramid; /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
- int i_open_gop; /* Open gop: 1=display order, 2=bluray compatibility braindamage mode */
+ int b_open_gop;
+ int b_bluray_compat;
int b_deblocking_filter;
int i_deblocking_filter_alphac0; /* [-6, 6] -6 light filter, 6 strong */
@@ -385,7 +382,8 @@
/* Cropping Rectangle parameters: added to those implicitly defined by
non-mod16 video resolutions. */
- struct {
+ struct
+ {
unsigned int i_left;
unsigned int i_top;
unsigned int i_right;
@@ -480,7 +478,8 @@
* H.264 level restriction information
****************************************************************************/
-typedef struct {
+typedef struct
+{
int level_idc;
int mbps; /* max macroblock processing rate (macroblocks/sec) */
int frame_size; /* max frame size (macroblocks) */
|
[-]
[+]
|
Changed |
x264-snapshot-20110622-2245.tar.bz2/x264cli.h
^
|
@@ -34,7 +34,7 @@
typedef void *hnd_t;
-static inline int64_t gcd( int64_t a, int64_t b )
+static inline uint64_t gcd( uint64_t a, uint64_t b )
{
while( 1 )
{
@@ -46,7 +46,7 @@
}
}
-static inline int64_t lcm( int64_t a, int64_t b )
+static inline uint64_t lcm( uint64_t a, uint64_t b )
{
return ( a / gcd( a, b ) ) * b;
}
|