File openjpeg-20070821svn-t1-updateflags-x86_64.patch of Package openjpeg
x
1
diff -urN -x '*.orig' -x '*.rej' -x '*~' -x '.*' OpenJPEG.orig/libopenjpeg/t1.c OpenJPEG.patched/libopenjpeg/t1.c
2
--- OpenJPEG.orig/libopenjpeg/t1.c 2007-08-23 05:53:17.000000000 -0500
3
+++ OpenJPEG.patched/libopenjpeg/t1.c 2007-08-23 05:56:33.000000000 -0500
4
5
static char t1_getspb(int f);
6
static short t1_getnmsedec_sig(int x, int bitpos);
7
static short t1_getnmsedec_ref(int x, int bitpos);
8
+#ifdef __amd64__
9
+static INLINE void t1_updateflags(flag_t *flagsp, int s, int stride);
10
+#else
11
static void t1_updateflags(flag_t *flagsp, int s, int stride);
12
+#endif
13
/**
14
Encode significant pass
15
*/
16
17
return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
18
}
19
20
+#ifdef __amd64__
21
+
22
+/* On 64 bit platforms we can set three flags at a time. (SWAR) */
23
+/* FIXME: Assumes little endian? */
24
+
25
+#define VEC(x,y,z) (int64)(x)|((int64)(y)<<16)|((int64)(z)<<32)
26
+
27
+static void t1_updateflags(flag_t *flagsp, int s, int stride) {
28
+ static const int64 mod[] = {
29
+ VEC(T1_SIG_SE, T1_SIG_E, T1_SIG_NE),
30
+ VEC(T1_SIG_SE, T1_SIG_E|T1_SGN_E, T1_SIG_NE),
31
+ VEC(T1_SIG_S, T1_SIG, T1_SIG_N),
32
+ VEC(T1_SIG_S|T1_SGN_S, T1_SIG, T1_SIG_N|T1_SGN_N),
33
+ VEC(T1_SIG_SW, T1_SIG_W, T1_SIG_NW),
34
+ VEC(T1_SIG_SW, T1_SIG_W|T1_SGN_W, T1_SIG_NW)
35
+ };
36
+
37
+ int64 tmp1 = *(int64*)((void*)&flagsp[-1 - stride]);
38
+ int64 tmp2 = *(int64*)((void*)&flagsp[-1 ]);
39
+ int64 tmp3 = *(int64*)((void*)&flagsp[-1 + stride]);
40
+
41
+ tmp1 |= mod[s];
42
+ tmp2 |= mod[s+2];
43
+ tmp3 |= mod[s+4];
44
+
45
+ *(int64*)((void*)&flagsp[-1 - stride]) = tmp1;
46
+ *(int64*)((void*)&flagsp[-1 ]) = tmp2;
47
+ *(int64*)((void*)&flagsp[-1 + stride]) = tmp3;
48
+}
49
+
50
+#else
51
+
52
static void t1_updateflags(flag_t *flagsp, int s, int stride) {
53
static const flag_t mod[] = {
54
T1_SIG_E, T1_SIG_E|T1_SGN_E,
55
56
flagsp[ 1 + stride] |= T1_SIG_NW;
57
}
58
59
+#endif
60
+
61
static void t1_enc_sigpass_step(
62
opj_t1_t *t1,
63
flag_t *flagsp,
64
65
for (i = 0; i < t1->w; ++i) {
66
if (k + 3 < t1->h) {
67
#ifdef __amd64__
68
+ /* 64 bit SWAR */
69
+ /* FIXME: Assumes little endian? */
70
int64 tmp = *((int64*)&t1->flags[(k+1) + (i+1)*(t1->h+2)]);
71
if (cblksty & J2K_CCP_CBLKSTY_VSC) {
72
tmp &= ~((int64)(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S)<<48);
73
74
memset(t1->data,0,datasize * sizeof(int));
75
76
flagssize=(h+2) * (w+2);
77
+#ifdef __amd64__
78
+ /* 64 bit SIMD/SWAR in t1_updateflags requires one short of headroom
79
+ because three shorts = 48 bits. */
80
+ ++flagssize;
81
+#endif
82
83
if(flagssize > t1->flagssize){
84
opj_aligned_free(t1->flags);
85