Logoj0ke.net Open Build Service > Projects > multimedia:SL11 > openjpeg > openjpeg-20070821svn-t1-updateflags-x86_64.patch
Sign Up | Log In

File openjpeg-20070821svn-t1-updateflags-x86_64.patch of Package openjpeg

x
 
1
diff -urN -x '*.orig' -x '*.rej' -x '*~' -x '.*' OpenJPEG.orig/libopenjpeg/t1.c OpenJPEG.patched/libopenjpeg/t1.c
2
--- OpenJPEG.orig/libopenjpeg/t1.c  2007-08-23 05:53:17.000000000 -0500
3
+++ OpenJPEG.patched/libopenjpeg/t1.c   2007-08-23 05:56:33.000000000 -0500
4
@@ -45,7 +45,11 @@
5
 static char t1_getspb(int f);
6
 static short t1_getnmsedec_sig(int x, int bitpos);
7
 static short t1_getnmsedec_ref(int x, int bitpos);
8
+#ifdef __amd64__
9
+static INLINE void t1_updateflags(flag_t *flagsp, int s, int stride);
10
+#else
11
 static void t1_updateflags(flag_t *flagsp, int s, int stride);
12
+#endif
13
 /**
14
 Encode significant pass
15
 */
16
@@ -258,6 +262,38 @@
17
     return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
18
 }
19
 
20
+#ifdef __amd64__
21
+
22
+/* On 64 bit platforms we can set three flags at a time. (SWAR) */
23
+/* FIXME: Assumes little endian? */
24
+
25
+#define VEC(x,y,z) (int64)(x)|((int64)(y)<<16)|((int64)(z)<<32)
26
+
27
+static void t1_updateflags(flag_t *flagsp, int s, int stride) {
28
+   static const int64 mod[] = {
29
+       VEC(T1_SIG_SE,         T1_SIG_E,          T1_SIG_NE),
30
+       VEC(T1_SIG_SE,         T1_SIG_E|T1_SGN_E, T1_SIG_NE),
31
+       VEC(T1_SIG_S,          T1_SIG,            T1_SIG_N),
32
+       VEC(T1_SIG_S|T1_SGN_S, T1_SIG,            T1_SIG_N|T1_SGN_N),
33
+       VEC(T1_SIG_SW,         T1_SIG_W,          T1_SIG_NW),
34
+       VEC(T1_SIG_SW,         T1_SIG_W|T1_SGN_W, T1_SIG_NW)
35
+   };
36
+
37
+   int64 tmp1 = *(int64*)((void*)&flagsp[-1 - stride]);
38
+   int64 tmp2 = *(int64*)((void*)&flagsp[-1         ]);
39
+   int64 tmp3 = *(int64*)((void*)&flagsp[-1 + stride]);
40
+
41
+   tmp1 |= mod[s];
42
+   tmp2 |= mod[s+2];
43
+   tmp3 |= mod[s+4];
44
+
45
+   *(int64*)((void*)&flagsp[-1 - stride]) = tmp1;
46
+   *(int64*)((void*)&flagsp[-1         ]) = tmp2;
47
+   *(int64*)((void*)&flagsp[-1 + stride]) = tmp3;
48
+}
49
+
50
+#else
51
+
52
 static void t1_updateflags(flag_t *flagsp, int s, int stride) {
53
    static const flag_t mod[] = {
54
        T1_SIG_E, T1_SIG_E|T1_SGN_E,
55
@@ -279,6 +315,8 @@
56
    flagsp[ 1 + stride] |= T1_SIG_NW;
57
 }
58
 
59
+#endif
60
+
61
 static void t1_enc_sigpass_step(
62
        opj_t1_t *t1,
63
        flag_t *flagsp,
64
@@ -670,6 +708,8 @@
65
        for (i = 0; i < t1->w; ++i) {
66
            if (k + 3 < t1->h) {
67
 #ifdef __amd64__
68
+               /* 64 bit SWAR */
69
+               /* FIXME: Assumes little endian? */
70
                int64 tmp = *((int64*)&t1->flags[(k+1) + (i+1)*(t1->h+2)]);
71
                if (cblksty & J2K_CCP_CBLKSTY_VSC) {
72
                    tmp &= ~((int64)(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S)<<48);
73
@@ -780,6 +820,11 @@
74
    memset(t1->data,0,datasize * sizeof(int));
75
 
76
    flagssize=(h+2) * (w+2);
77
+#ifdef __amd64__
78
+   /* 64 bit SIMD/SWAR in t1_updateflags requires one short of headroom
79
+      because three shorts = 48 bits. */
80
+   ++flagssize;
81
+#endif
82
 
83
    if(flagssize > t1->flagssize){
84
        opj_aligned_free(t1->flags);
85