GDAL
gdalsse_priv.h
1 /******************************************************************************
2  * $Id: gdalsse_priv.h 32173 2015-12-14 00:04:53Z goatbar $
3  *
4  * Project: GDAL
5  * Purpose: SSE2 helper
6  * Author: Even Rouault <even dot rouault at spatialys dot com>
7  *
8  ******************************************************************************
9  * Copyright (c) 2014, Even Rouault <even dot rouault at spatialys dot com>
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included
19  * in all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  ****************************************************************************/
29 
30 #ifndef GDALSSE_PRIV_H_INCLUDED
31 #define GDALSSE_PRIV_H_INCLUDED
32 
33 #include "cpl_port.h"
34 
35 /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
36 /* Could possibly be used too on 32bit, but we would need to check at runtime */
37 #if (defined(__x86_64) || defined(_M_X64)) && !defined(USE_SSE2_EMULATION)
38 
39 /* Requires SSE2 */
40 #include <emmintrin.h>
41 #include <string.h>
42 
43 class XMMReg2Double
44 {
45  public:
46  __m128d xmm;
47 
48  /* coverity[uninit_member] */
49  XMMReg2Double() {}
50 
51  XMMReg2Double(double val) { xmm = _mm_load_sd (&val); }
52  XMMReg2Double(const XMMReg2Double& other) : xmm(other.xmm) {}
53 
54  static inline XMMReg2Double Zero()
55  {
56  XMMReg2Double reg;
57  reg.Zeroize();
58  return reg;
59  }
60 
61  static inline XMMReg2Double Load1ValHighAndLow(const double* ptr)
62  {
63  XMMReg2Double reg;
64  reg.nsLoad1ValHighAndLow(ptr);
65  return reg;
66  }
67 
68  static inline XMMReg2Double Load2Val(const double* ptr)
69  {
70  XMMReg2Double reg;
71  reg.nsLoad2Val(ptr);
72  return reg;
73  }
74 
75  static inline XMMReg2Double Load2Val(const float* ptr)
76  {
77  XMMReg2Double reg;
78  reg.nsLoad2Val(ptr);
79  return reg;
80  }
81 
82  static inline XMMReg2Double Load2ValAligned(const double* ptr)
83  {
84  XMMReg2Double reg;
85  reg.nsLoad2ValAligned(ptr);
86  return reg;
87  }
88 
89  static inline XMMReg2Double Load2Val(const unsigned char* ptr)
90  {
91  XMMReg2Double reg;
92  reg.nsLoad2Val(ptr);
93  return reg;
94  }
95 
96  static inline XMMReg2Double Load2Val(const short* ptr)
97  {
98  XMMReg2Double reg;
99  reg.nsLoad2Val(ptr);
100  return reg;
101  }
102 
103  static inline XMMReg2Double Load2Val(const unsigned short* ptr)
104  {
105  XMMReg2Double reg;
106  reg.nsLoad2Val(ptr);
107  return reg;
108  }
109 
110  static inline XMMReg2Double Equals(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
111  {
112  XMMReg2Double reg;
113  reg.xmm = _mm_cmpeq_pd(expr1.xmm, expr2.xmm);
114  return reg;
115  }
116 
117  static inline XMMReg2Double NotEquals(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
118  {
119  XMMReg2Double reg;
120  reg.xmm = _mm_cmpneq_pd(expr1.xmm, expr2.xmm);
121  return reg;
122  }
123 
124  static inline XMMReg2Double Greater(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
125  {
126  XMMReg2Double reg;
127  reg.xmm = _mm_cmpgt_pd(expr1.xmm, expr2.xmm);
128  return reg;
129  }
130 
131  static inline XMMReg2Double And(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
132  {
133  XMMReg2Double reg;
134  reg.xmm = _mm_and_pd(expr1.xmm, expr2.xmm);
135  return reg;
136  }
137 
138  static inline XMMReg2Double Ternary(const XMMReg2Double& cond, const XMMReg2Double& true_expr, const XMMReg2Double& false_expr)
139  {
140  XMMReg2Double reg;
141  reg.xmm = _mm_or_pd(_mm_and_pd (cond.xmm, true_expr.xmm), _mm_andnot_pd(cond.xmm, false_expr.xmm));
142  return reg;
143  }
144 
145  static inline XMMReg2Double Min(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
146  {
147  XMMReg2Double reg;
148  reg.xmm = _mm_min_pd(expr1.xmm, expr2.xmm);
149  return reg;
150  }
151 
152  inline void nsLoad1ValHighAndLow(const double* ptr)
153  {
154  xmm = _mm_load1_pd(ptr);
155  }
156 
157  inline void nsLoad2Val(const double* ptr)
158  {
159  xmm = _mm_loadu_pd(ptr);
160  }
161 
162  inline void nsLoad2ValAligned(const double* pval)
163  {
164  xmm = _mm_load_pd(pval);
165  }
166 
167  inline void nsLoad2Val(const float* pval)
168  {
169  __m128 temp1 = _mm_load_ss(pval);
170  __m128 temp2 = _mm_load_ss(pval + 1);
171  temp1 = _mm_shuffle_ps(temp1, temp2, _MM_SHUFFLE(1,0,1,0));
172  temp1 = _mm_shuffle_ps(temp1, temp1, _MM_SHUFFLE(3,3,2,0));
173  xmm = _mm_cvtps_pd(temp1);
174  }
175 
176  inline void nsLoad2Val(const unsigned char* ptr)
177  {
178 #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
179  unsigned short s;
180  memcpy(&s, ptr, 2);
181  __m128i xmm_i = _mm_cvtsi32_si128(s);
182 #else
183  __m128i xmm_i = _mm_cvtsi32_si128(*(unsigned short*)(ptr));
184 #endif
185  xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
186  xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
187  xmm = _mm_cvtepi32_pd(xmm_i);
188  }
189 
190  inline void nsLoad2Val(const short* ptr)
191  {
192  int i;
193  memcpy(&i, ptr, 4);
194  __m128i xmm_i = _mm_cvtsi32_si128(i);
195  xmm_i = _mm_unpacklo_epi16(xmm_i,xmm_i); /* 0|0|0|0|0|0|b|a --> 0|0|0|0|b|b|a|a */
196  xmm_i = _mm_srai_epi32(xmm_i, 16); /* 0|0|0|0|b|b|a|a --> 0|0|0|0|sign(b)|b|sign(a)|a */
197  xmm = _mm_cvtepi32_pd(xmm_i);
198  }
199 
200  inline void nsLoad2Val(const unsigned short* ptr)
201  {
202  int i;
203  memcpy(&i, ptr, 4);
204  __m128i xmm_i = _mm_cvtsi32_si128(i);
205  xmm_i = _mm_unpacklo_epi16(xmm_i,xmm_i); /* 0|0|0|0|0|0|b|a --> 0|0|0|0|b|b|a|a */
206  xmm_i = _mm_srli_epi32(xmm_i, 16); /* 0|0|0|0|b|b|a|a --> 0|0|0|0|0|b|0|a */
207  xmm = _mm_cvtepi32_pd(xmm_i);
208  }
209 
210  static inline void Load4Val(const unsigned char* ptr, XMMReg2Double& low, XMMReg2Double& high)
211  {
212 #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
213  int i;
214  memcpy(&i, ptr, 4);
215  __m128i xmm_i = _mm_cvtsi32_si128(i);
216 #else
217  __m128i xmm_i = _mm_cvtsi32_si128(*(int*)(ptr));
218 #endif
219  xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
220  xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
221  low.xmm = _mm_cvtepi32_pd(xmm_i);
222  high.xmm = _mm_cvtepi32_pd(_mm_shuffle_epi32(xmm_i,_MM_SHUFFLE(3,2,3,2)));
223  }
224 
225  static inline void Load4Val(const short* ptr, XMMReg2Double& low, XMMReg2Double& high)
226  {
227  low.nsLoad2Val(ptr);
228  high.nsLoad2Val(ptr+2);
229  }
230 
231  static inline void Load4Val(const unsigned short* ptr, XMMReg2Double& low, XMMReg2Double& high)
232  {
233  low.nsLoad2Val(ptr);
234  high.nsLoad2Val(ptr+2);
235  }
236 
237  static inline void Load4Val(const double* ptr, XMMReg2Double& low, XMMReg2Double& high)
238  {
239  low.nsLoad2Val(ptr);
240  high.nsLoad2Val(ptr+2);
241  }
242 
243  static inline void Load4Val(const float* ptr, XMMReg2Double& low, XMMReg2Double& high)
244  {
245  __m128 temp1 = _mm_loadu_ps(ptr);
246  __m128 temp2 = _mm_shuffle_ps(temp1, temp1, _MM_SHUFFLE(3,2,3,2));
247  low.xmm = _mm_cvtps_pd(temp1);
248  high.xmm = _mm_cvtps_pd(temp2);
249  }
250 
251  inline void Zeroize()
252  {
253  xmm = _mm_setzero_pd();
254  }
255 
256  inline XMMReg2Double& operator= (const XMMReg2Double& other)
257  {
258  xmm = other.xmm;
259  return *this;
260  }
261 
262  inline XMMReg2Double& operator+= (const XMMReg2Double& other)
263  {
264  xmm = _mm_add_pd(xmm, other.xmm);
265  return *this;
266  }
267 
268  inline XMMReg2Double& operator*= (const XMMReg2Double& other)
269  {
270  xmm = _mm_mul_pd(xmm, other.xmm);
271  return *this;
272  }
273 
274  inline XMMReg2Double operator+ (const XMMReg2Double& other) const
275  {
276  XMMReg2Double ret;
277  ret.xmm = _mm_add_pd(xmm, other.xmm);
278  return ret;
279  }
280 
281  inline XMMReg2Double operator- (const XMMReg2Double& other) const
282  {
283  XMMReg2Double ret;
284  ret.xmm = _mm_sub_pd(xmm, other.xmm);
285  return ret;
286  }
287 
288  inline XMMReg2Double operator* (const XMMReg2Double& other) const
289  {
290  XMMReg2Double ret;
291  ret.xmm = _mm_mul_pd(xmm, other.xmm);
292  return ret;
293  }
294 
295  inline XMMReg2Double operator/ (const XMMReg2Double& other) const
296  {
297  XMMReg2Double ret;
298  ret.xmm = _mm_div_pd(xmm, other.xmm);
299  return ret;
300  }
301 
302  inline void AddLowAndHigh()
303  {
304  __m128d xmm2;
305  xmm2 = _mm_shuffle_pd(xmm,xmm,_MM_SHUFFLE2(0,1)); /* transfer high word into low word of xmm2 */
306  xmm = _mm_add_pd(xmm, xmm2);
307  }
308 
309  inline void Store2Double(double* pval) const
310  {
311  _mm_storeu_pd(pval, xmm);
312  }
313 
314  inline void Store2DoubleAligned(double* pval) const
315  {
316  _mm_store_pd(pval, xmm);
317  }
318 
319  void Store2Val(unsigned short* ptr) const
320  {
321  __m128i tmp = _mm_cvtpd_epi32(xmm); /* Convert the 2 double values to 2 integers */
322  ptr[0] = (GUInt16)_mm_extract_epi16(tmp, 0);
323  ptr[1] = (GUInt16)_mm_extract_epi16(tmp, 2);
324  }
325 
326  inline operator double () const
327  {
328  double val;
329  _mm_store_sd(&val, xmm);
330  return val;
331  }
332 };
333 
334 #else
335 
336 #warning "Software emulation of SSE2 !"
337 
339 {
340  public:
341  double low;
342  double high;
343 
344  XMMReg2Double() {}
345  XMMReg2Double(double val) { low = val; high = 0.0; }
346  XMMReg2Double(const XMMReg2Double& other) : low(other.low), high(other.high) {}
347 
348  static inline XMMReg2Double Zero()
349  {
350  XMMReg2Double reg;
351  reg.Zeroize();
352  return reg;
353  }
354 
355  static inline XMMReg2Double Load1ValHighAndLow(const double* ptr)
356  {
357  XMMReg2Double reg;
358  reg.nsLoad1ValHighAndLow(ptr);
359  return reg;
360  }
361 
362  static inline XMMReg2Double Equals(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
363  {
364  XMMReg2Double reg;
365 
366  if (expr1.low == expr2.low)
367  memset(&(reg.low), 0xFF, sizeof(double));
368  else
369  reg.low = 0;
370 
371  if (expr1.high == expr2.high)
372  memset(&(reg.high), 0xFF, sizeof(double));
373  else
374  reg.high = 0;
375 
376  return reg;
377  }
378 
379  static inline XMMReg2Double NotEquals(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
380  {
381  XMMReg2Double reg;
382 
383  if (expr1.low != expr2.low)
384  memset(&(reg.low), 0xFF, sizeof(double));
385  else
386  reg.low = 0;
387 
388  if (expr1.high != expr2.high)
389  memset(&(reg.high), 0xFF, sizeof(double));
390  else
391  reg.high = 0;
392 
393  return reg;
394  }
395 
396  static inline XMMReg2Double Greater(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
397  {
398  XMMReg2Double reg;
399 
400  if (expr1.low > expr2.low)
401  memset(&(reg.low), 0xFF, sizeof(double));
402  else
403  reg.low = 0;
404 
405  if (expr1.high > expr2.high)
406  memset(&(reg.high), 0xFF, sizeof(double));
407  else
408  reg.high = 0;
409 
410  return reg;
411  }
412 
413  static inline XMMReg2Double And(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
414  {
415  XMMReg2Double reg;
416  int low1[2], high1[2];
417  int low2[2], high2[2];
418  memcpy(low1, &expr1.low, sizeof(double));
419  memcpy(high1, &expr1.high, sizeof(double));
420  memcpy(low2, &expr2.low, sizeof(double));
421  memcpy(high2, &expr2.high, sizeof(double));
422  low1[0] &= low2[0];
423  low1[1] &= low2[1];
424  high1[0] &= high2[0];
425  high1[1] &= high2[1];
426  memcpy(&reg.low, low1, sizeof(double));
427  memcpy(&reg.high, high1, sizeof(double));
428  return reg;
429  }
430 
431  static inline XMMReg2Double Ternary(const XMMReg2Double& cond, const XMMReg2Double& true_expr, const XMMReg2Double& false_expr)
432  {
433  XMMReg2Double reg;
434  if( cond.low )
435  reg.low = true_expr.low;
436  else
437  reg.low = false_expr.low;
438  if( cond.high )
439  reg.high = true_expr.high;
440  else
441  reg.high = false_expr.high;
442  return reg;
443  }
444 
445  static inline XMMReg2Double Min(const XMMReg2Double& expr1, const XMMReg2Double& expr2)
446  {
447  XMMReg2Double reg;
448  reg.low = (expr1.low < expr2.low) ? expr1.low : expr2.high;
449  reg.high = (expr1.high < expr2.high) ? expr1.high : expr2.low;
450  return reg;
451  }
452 
453  static inline XMMReg2Double Load2Val(const double* ptr)
454  {
455  XMMReg2Double reg;
456  reg.nsLoad2Val(ptr);
457  return reg;
458  }
459 
460  static inline XMMReg2Double Load2ValAligned(const double* ptr)
461  {
462  XMMReg2Double reg;
463  reg.nsLoad2ValAligned(ptr);
464  return reg;
465  }
466 
467  static inline XMMReg2Double Load2Val(const float* ptr)
468  {
469  XMMReg2Double reg;
470  reg.nsLoad2Val(ptr);
471  return reg;
472  }
473 
474  static inline XMMReg2Double Load2Val(const unsigned char* ptr)
475  {
476  XMMReg2Double reg;
477  reg.nsLoad2Val(ptr);
478  return reg;
479  }
480 
481  static inline XMMReg2Double Load2Val(const short* ptr)
482  {
483  XMMReg2Double reg;
484  reg.nsLoad2Val(ptr);
485  return reg;
486  }
487 
488  static inline XMMReg2Double Load2Val(const unsigned short* ptr)
489  {
490  XMMReg2Double reg;
491  reg.nsLoad2Val(ptr);
492  return reg;
493  }
494 
495  inline void nsLoad1ValHighAndLow(const double* pval)
496  {
497  low = pval[0];
498  high = pval[0];
499  }
500 
501  inline void nsLoad2Val(const double* pval)
502  {
503  low = pval[0];
504  high = pval[1];
505  }
506 
507  inline void nsLoad2ValAligned(const double* pval)
508  {
509  low = pval[0];
510  high = pval[1];
511  }
512 
513  inline void nsLoad2Val(const float* pval)
514  {
515  low = pval[0];
516  high = pval[1];
517  }
518 
519  inline void nsLoad2Val(const unsigned char* ptr)
520  {
521  low = ptr[0];
522  high = ptr[1];
523  }
524 
525  inline void nsLoad2Val(const short* ptr)
526  {
527  low = ptr[0];
528  high = ptr[1];
529  }
530 
531  inline void nsLoad2Val(const unsigned short* ptr)
532  {
533  low = ptr[0];
534  high = ptr[1];
535  }
536 
537  static inline void Load4Val(const unsigned char* ptr, XMMReg2Double& low, XMMReg2Double& high)
538  {
539  low.low = ptr[0];
540  low.high = ptr[1];
541  high.low = ptr[2];
542  high.high = ptr[3];
543  }
544 
545  static inline void Load4Val(const short* ptr, XMMReg2Double& low, XMMReg2Double& high)
546  {
547  low.nsLoad2Val(ptr);
548  high.nsLoad2Val(ptr+2);
549  }
550 
551  static inline void Load4Val(const unsigned short* ptr, XMMReg2Double& low, XMMReg2Double& high)
552  {
553  low.nsLoad2Val(ptr);
554  high.nsLoad2Val(ptr+2);
555  }
556 
557  static inline void Load4Val(const double* ptr, XMMReg2Double& low, XMMReg2Double& high)
558  {
559  low.nsLoad2Val(ptr);
560  high.nsLoad2Val(ptr+2);
561  }
562 
563  static inline void Load4Val(const float* ptr, XMMReg2Double& low, XMMReg2Double& high)
564  {
565  low.nsLoad2Val(ptr);
566  high.nsLoad2Val(ptr+2);
567  }
568 
569  inline void Zeroize()
570  {
571  low = 0.0;
572  high = 0.0;
573  }
574 
575  inline XMMReg2Double& operator= (const XMMReg2Double& other)
576  {
577  low = other.low;
578  high = other.high;
579  return *this;
580  }
581 
582  inline XMMReg2Double& operator+= (const XMMReg2Double& other)
583  {
584  low += other.low;
585  high += other.high;
586  return *this;
587  }
588 
589  inline XMMReg2Double& operator*= (const XMMReg2Double& other)
590  {
591  low *= other.low;
592  high *= other.high;
593  return *this;
594  }
595 
596  inline XMMReg2Double operator+ (const XMMReg2Double& other) const
597  {
598  XMMReg2Double ret;
599  ret.low = low + other.low;
600  ret.high = high + other.high;
601  return ret;
602  }
603 
604  inline XMMReg2Double operator- (const XMMReg2Double& other) const
605  {
606  XMMReg2Double ret;
607  ret.low = low - other.low;
608  ret.high = high - other.high;
609  return ret;
610  }
611 
612  inline XMMReg2Double operator* (const XMMReg2Double& other) const
613  {
614  XMMReg2Double ret;
615  ret.low = low * other.low;
616  ret.high = high * other.high;
617  return ret;
618  }
619 
620  inline XMMReg2Double operator/ (const XMMReg2Double& other) const
621  {
622  XMMReg2Double ret;
623  ret.low = low / other.low;
624  ret.high = high / other.high;
625  return ret;
626  }
627 
628  inline void AddLowAndHigh()
629  {
630  double add = low + high;
631  low = add;
632  high = add;
633  }
634 
635  inline void Store2Double(double* pval) const
636  {
637  pval[0] = low;
638  pval[1] = high;
639  }
640 
641  inline void Store2DoubleAligned(double* pval) const
642  {
643  pval[0] = low;
644  pval[1] = high;
645  }
646 
647  void Store2Val(unsigned short* ptr) const
648  {
649  ptr[0] = (GUInt16)low;
650  ptr[1] = (GUInt16)high;
651  }
652 
653  inline operator double () const
654  {
655  return low;
656  }
657 };
658 
659 #endif /* defined(__x86_64) || defined(_M_X64) */
660 
662 {
663  public:
664  XMMReg2Double low, high;
665 
666  XMMReg4Double() {}
667  XMMReg4Double(const XMMReg4Double& other) : low(other.low), high(other.high) {}
668 
669  static inline XMMReg4Double Zero()
670  {
671  XMMReg4Double reg;
672  reg.low.Zeroize();
673  reg.high.Zeroize();
674  return reg;
675  }
676 
677  static inline XMMReg4Double Load1ValHighAndLow(const double* ptr)
678  {
679  XMMReg4Double reg;
680  reg.low.nsLoad1ValHighAndLow(ptr);
681  reg.high = reg.low;
682  return reg;
683  }
684 
685  static inline XMMReg4Double Load4Val(const unsigned char* ptr)
686  {
687  XMMReg4Double reg;
688  XMMReg2Double::Load4Val(ptr, reg.low, reg.high);
689  return reg;
690  }
691 
692  static inline XMMReg4Double Load4Val(const short* ptr)
693  {
694  XMMReg4Double reg;
695  reg.low.nsLoad2Val(ptr);
696  reg.high.nsLoad2Val(ptr+2);
697  return reg;
698  }
699 
700  static inline XMMReg4Double Load4Val(const unsigned short* ptr)
701  {
702  XMMReg4Double reg;
703  reg.low.nsLoad2Val(ptr);
704  reg.high.nsLoad2Val(ptr+2);
705  return reg;
706  }
707 
708  static inline XMMReg4Double Load4Val(const double* ptr)
709  {
710  XMMReg4Double reg;
711  reg.low.nsLoad2Val(ptr);
712  reg.high.nsLoad2Val(ptr+2);
713  return reg;
714  }
715 
716  static inline XMMReg4Double Load4ValAligned(const double* ptr)
717  {
718  XMMReg4Double reg;
719  reg.low.nsLoad2ValAligned(ptr);
720  reg.high.nsLoad2ValAligned(ptr+2);
721  return reg;
722  }
723 
724  static inline XMMReg4Double Load4Val(const float* ptr)
725  {
726  XMMReg4Double reg;
727  XMMReg2Double::Load4Val(ptr, reg.low, reg.high);
728  return reg;
729  }
730 
731  static inline XMMReg4Double Equals(const XMMReg4Double& expr1, const XMMReg4Double& expr2)
732  {
733  XMMReg4Double reg;
734  reg.low = XMMReg2Double::Equals(expr1.low, expr2.low);
735  reg.high = XMMReg2Double::Equals(expr1.high, expr2.high);
736  return reg;
737  }
738 
739  static inline XMMReg4Double NotEquals(const XMMReg4Double& expr1, const XMMReg4Double& expr2)
740  {
741  XMMReg4Double reg;
742  reg.low = XMMReg2Double::NotEquals(expr1.low, expr2.low);
743  reg.high = XMMReg2Double::NotEquals(expr1.high, expr2.high);
744  return reg;
745  }
746 
747  static inline XMMReg4Double Greater(const XMMReg4Double& expr1, const XMMReg4Double& expr2)
748  {
749  XMMReg4Double reg;
750  reg.low = XMMReg2Double::Greater(expr1.low, expr2.low);
751  reg.high = XMMReg2Double::Greater(expr1.high, expr2.high);
752  return reg;
753  }
754 
755  static inline XMMReg4Double And(const XMMReg4Double& expr1, const XMMReg4Double& expr2)
756  {
757  XMMReg4Double reg;
758  reg.low = XMMReg2Double::And(expr1.low, expr2.low);
759  reg.high = XMMReg2Double::And(expr1.high, expr2.high);
760  return reg;
761  }
762 
763  static inline XMMReg4Double Ternary(const XMMReg4Double& cond, const XMMReg4Double& true_expr, const XMMReg4Double& false_expr)
764  {
765  XMMReg4Double reg;
766  reg.low = XMMReg2Double::Ternary(cond.low, true_expr.low, false_expr.low);
767  reg.high = XMMReg2Double::Ternary(cond.high, true_expr.high, false_expr.high);
768  return reg;
769  }
770 
771  static inline XMMReg4Double Min(const XMMReg4Double& expr1, const XMMReg4Double& expr2)
772  {
773  XMMReg4Double reg;
774  reg.low = XMMReg2Double::Min(expr1.low, expr2.low);
775  reg.high = XMMReg2Double::Min(expr1.high, expr2.high);
776  return reg;
777  }
778 
779  inline XMMReg4Double& operator= (const XMMReg4Double& other)
780  {
781  low = other.low;
782  high = other.high;
783  return *this;
784  }
785 
786  inline XMMReg4Double& operator+= (const XMMReg4Double& other)
787  {
788  low += other.low;
789  high += other.high;
790  return *this;
791  }
792 
793  inline XMMReg4Double& operator*= (const XMMReg4Double& other)
794  {
795  low *= other.low;
796  high *= other.high;
797  return *this;
798  }
799 
800  inline XMMReg4Double operator+ (const XMMReg4Double& other) const
801  {
802  XMMReg4Double ret;
803  ret.low = low + other.low;
804  ret.high = high + other.high;
805  return ret;
806  }
807 
808  inline XMMReg4Double operator- (const XMMReg4Double& other) const
809  {
810  XMMReg4Double ret;
811  ret.low = low - other.low;
812  ret.high = high - other.high;
813  return ret;
814  }
815 
816  inline XMMReg4Double operator* (const XMMReg4Double& other) const
817  {
818  XMMReg4Double ret;
819  ret.low = low * other.low;
820  ret.high = high * other.high;
821  return ret;
822  }
823 
824  inline XMMReg4Double operator/ (const XMMReg4Double& other) const
825  {
826  XMMReg4Double ret;
827  ret.low = low / other.low;
828  ret.high = high / other.high;
829  return ret;
830  }
831 
832  inline void AddLowAndHigh()
833  {
834  low = low + high;
835  low.AddLowAndHigh();
836  }
837 
838  inline XMMReg2Double& GetLow()
839  {
840  return low;
841  }
842 
843  inline XMMReg2Double& GetHigh()
844  {
845  return high;
846  }
847 
848  void Store4Val(unsigned short* ptr) const
849  {
850  low.Store2Val(ptr);
851  high.Store2Val(ptr+2);
852  }
853 };
854 
855 #endif /* GDALSSE_PRIV_H_INCLUDED */
Core portability definitions for CPL.
Definition: gdalsse_priv.h:338
Definition: gdalsse_priv.h:661

Generated for GDAL by doxygen 1.8.12.