LLVM OpenMP* Runtime Library
kmp_atomic.h
1 /*
2  * kmp_atomic.h - ATOMIC header file
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #ifndef KMP_ATOMIC_H
17 #define KMP_ATOMIC_H
18 
19 #include "kmp_os.h"
20 #include "kmp_lock.h"
21 
22 #if OMPT_SUPPORT
23 #include "ompt-specific.h"
24 #endif
25 
26 // C++ build port.
27 // Intel compiler does not support _Complex datatype on win.
28 // Intel compiler supports _Complex datatype on lin and mac.
29 // On the other side, there is a problem of stack alignment on lin_32 and mac_32
30 // if the rhs is cmplx80 or cmplx128 typedef'ed datatype.
31 // The decision is: to use compiler supported _Complex type on lin and mac,
32 // to use typedef'ed types on win.
33 // Condition for WIN64 was modified in anticipation of 10.1 build compiler.
34 
35 #if defined( __cplusplus ) && ( KMP_OS_WINDOWS )
36  // create shortcuts for c99 complex types
37 
38  #if (_MSC_VER < 1600) && defined(_DEBUG)
39  // Workaround for the problem of _DebugHeapTag unresolved external.
40  // This problem prevented to use our static debug library for C tests
41  // compiled with /MDd option (the library itself built with /MTd),
42  #undef _DEBUG
43  #define _DEBUG_TEMPORARILY_UNSET_
44  #endif
45 
46  #include <complex>
47 
48  template< typename type_lhs, typename type_rhs >
49  std::complex< type_lhs > __kmp_lhs_div_rhs(
50  const std::complex< type_lhs >& lhs,
51  const std::complex< type_rhs >& rhs ) {
52  type_lhs a = lhs.real();
53  type_lhs b = lhs.imag();
54  type_rhs c = rhs.real();
55  type_rhs d = rhs.imag();
56  type_rhs den = c*c + d*d;
57  type_rhs r = ( a*c + b*d );
58  type_rhs i = ( b*c - a*d );
59  std::complex< type_lhs > ret( r/den, i/den );
60  return ret;
61  }
62 
63  // complex8
64  struct __kmp_cmplx64_t : std::complex< double > {
65 
66  __kmp_cmplx64_t() : std::complex< double > () {}
67 
68  __kmp_cmplx64_t( const std::complex< double >& cd )
69  : std::complex< double > ( cd ) {}
70 
71  void operator /= ( const __kmp_cmplx64_t& rhs ) {
72  std::complex< double > lhs = *this;
73  *this = __kmp_lhs_div_rhs( lhs, rhs );
74  }
75 
76  __kmp_cmplx64_t operator / ( const __kmp_cmplx64_t& rhs ) {
77  std::complex< double > lhs = *this;
78  return __kmp_lhs_div_rhs( lhs, rhs );
79  }
80 
81  };
82  typedef struct __kmp_cmplx64_t kmp_cmplx64;
83 
84  // complex4
85  struct __kmp_cmplx32_t : std::complex< float > {
86 
87  __kmp_cmplx32_t() : std::complex< float > () {}
88 
89  __kmp_cmplx32_t( const std::complex<float>& cf )
90  : std::complex< float > ( cf ) {}
91 
92  __kmp_cmplx32_t operator + ( const __kmp_cmplx32_t& b ) {
93  std::complex< float > lhs = *this;
94  std::complex< float > rhs = b;
95  return ( lhs + rhs );
96  }
97  __kmp_cmplx32_t operator - ( const __kmp_cmplx32_t& b ) {
98  std::complex< float > lhs = *this;
99  std::complex< float > rhs = b;
100  return ( lhs - rhs );
101  }
102  __kmp_cmplx32_t operator * ( const __kmp_cmplx32_t& b ) {
103  std::complex< float > lhs = *this;
104  std::complex< float > rhs = b;
105  return ( lhs * rhs );
106  }
107 
108  __kmp_cmplx32_t operator + ( const kmp_cmplx64& b ) {
109  kmp_cmplx64 t = kmp_cmplx64( *this ) + b;
110  std::complex< double > d( t );
111  std::complex< float > f( d );
112  __kmp_cmplx32_t r( f );
113  return r;
114  }
115  __kmp_cmplx32_t operator - ( const kmp_cmplx64& b ) {
116  kmp_cmplx64 t = kmp_cmplx64( *this ) - b;
117  std::complex< double > d( t );
118  std::complex< float > f( d );
119  __kmp_cmplx32_t r( f );
120  return r;
121  }
122  __kmp_cmplx32_t operator * ( const kmp_cmplx64& b ) {
123  kmp_cmplx64 t = kmp_cmplx64( *this ) * b;
124  std::complex< double > d( t );
125  std::complex< float > f( d );
126  __kmp_cmplx32_t r( f );
127  return r;
128  }
129 
130  void operator /= ( const __kmp_cmplx32_t& rhs ) {
131  std::complex< float > lhs = *this;
132  *this = __kmp_lhs_div_rhs( lhs, rhs );
133  }
134 
135  __kmp_cmplx32_t operator / ( const __kmp_cmplx32_t& rhs ) {
136  std::complex< float > lhs = *this;
137  return __kmp_lhs_div_rhs( lhs, rhs );
138  }
139 
140  void operator /= ( const kmp_cmplx64& rhs ) {
141  std::complex< float > lhs = *this;
142  *this = __kmp_lhs_div_rhs( lhs, rhs );
143  }
144 
145  __kmp_cmplx32_t operator / ( const kmp_cmplx64& rhs ) {
146  std::complex< float > lhs = *this;
147  return __kmp_lhs_div_rhs( lhs, rhs );
148  }
149  };
150  typedef struct __kmp_cmplx32_t kmp_cmplx32;
151 
152  // complex10
153  struct KMP_DO_ALIGN( 16 ) __kmp_cmplx80_t : std::complex< long double > {
154 
155  __kmp_cmplx80_t() : std::complex< long double > () {}
156 
157  __kmp_cmplx80_t( const std::complex< long double >& cld )
158  : std::complex< long double > ( cld ) {}
159 
160  void operator /= ( const __kmp_cmplx80_t& rhs ) {
161  std::complex< long double > lhs = *this;
162  *this = __kmp_lhs_div_rhs( lhs, rhs );
163  }
164 
165  __kmp_cmplx80_t operator / ( const __kmp_cmplx80_t& rhs ) {
166  std::complex< long double > lhs = *this;
167  return __kmp_lhs_div_rhs( lhs, rhs );
168  }
169 
170  };
171  typedef KMP_DO_ALIGN( 16 ) struct __kmp_cmplx80_t kmp_cmplx80;
172 
173  // complex16
174  #if KMP_HAVE_QUAD
175  struct __kmp_cmplx128_t : std::complex< _Quad > {
176 
177  __kmp_cmplx128_t() : std::complex< _Quad > () {}
178 
179  __kmp_cmplx128_t( const std::complex< _Quad >& cq )
180  : std::complex< _Quad > ( cq ) {}
181 
182  void operator /= ( const __kmp_cmplx128_t& rhs ) {
183  std::complex< _Quad > lhs = *this;
184  *this = __kmp_lhs_div_rhs( lhs, rhs );
185  }
186 
187  __kmp_cmplx128_t operator / ( const __kmp_cmplx128_t& rhs ) {
188  std::complex< _Quad > lhs = *this;
189  return __kmp_lhs_div_rhs( lhs, rhs );
190  }
191 
192  };
193  typedef struct __kmp_cmplx128_t kmp_cmplx128;
194  #endif /* KMP_HAVE_QUAD */
195 
196  #ifdef _DEBUG_TEMPORARILY_UNSET_
197  #undef _DEBUG_TEMPORARILY_UNSET_
198  // Set it back now
199  #define _DEBUG 1
200  #endif
201 
202 #else
203  // create shortcuts for c99 complex types
204  typedef float _Complex kmp_cmplx32;
205  typedef double _Complex kmp_cmplx64;
206  typedef long double _Complex kmp_cmplx80;
207  #if KMP_HAVE_QUAD
208  typedef _Quad _Complex kmp_cmplx128;
209  #endif
210 #endif
211 
212 // Compiler 12.0 changed alignment of 16 and 32-byte arguments (like _Quad
213 // and kmp_cmplx128) on IA-32 architecture. The following aligned structures
214 // are implemented to support the old alignment in 10.1, 11.0, 11.1 and
215 // introduce the new alignment in 12.0. See CQ88405.
216 #if KMP_ARCH_X86 && KMP_HAVE_QUAD
217 
218  // 4-byte aligned structures for backward compatibility.
219 
220  #pragma pack( push, 4 )
221 
222 
223  struct KMP_DO_ALIGN( 4 ) Quad_a4_t {
224  _Quad q;
225 
226  Quad_a4_t( ) : q( ) {}
227  Quad_a4_t( const _Quad & cq ) : q ( cq ) {}
228 
229  Quad_a4_t operator + ( const Quad_a4_t& b ) {
230  _Quad lhs = (*this).q;
231  _Quad rhs = b.q;
232  return (Quad_a4_t)( lhs + rhs );
233  }
234 
235  Quad_a4_t operator - ( const Quad_a4_t& b ) {
236  _Quad lhs = (*this).q;
237  _Quad rhs = b.q;
238  return (Quad_a4_t)( lhs - rhs );
239  }
240  Quad_a4_t operator * ( const Quad_a4_t& b ) {
241  _Quad lhs = (*this).q;
242  _Quad rhs = b.q;
243  return (Quad_a4_t)( lhs * rhs );
244  }
245 
246  Quad_a4_t operator / ( const Quad_a4_t& b ) {
247  _Quad lhs = (*this).q;
248  _Quad rhs = b.q;
249  return (Quad_a4_t)( lhs / rhs );
250  }
251 
252  };
253 
254  struct KMP_DO_ALIGN( 4 ) kmp_cmplx128_a4_t {
255  kmp_cmplx128 q;
256 
257  kmp_cmplx128_a4_t() : q () {}
258 
259  kmp_cmplx128_a4_t( const kmp_cmplx128 & c128 ) : q ( c128 ) {}
260 
261  kmp_cmplx128_a4_t operator + ( const kmp_cmplx128_a4_t& b ) {
262  kmp_cmplx128 lhs = (*this).q;
263  kmp_cmplx128 rhs = b.q;
264  return (kmp_cmplx128_a4_t)( lhs + rhs );
265  }
266  kmp_cmplx128_a4_t operator - ( const kmp_cmplx128_a4_t& b ) {
267  kmp_cmplx128 lhs = (*this).q;
268  kmp_cmplx128 rhs = b.q;
269  return (kmp_cmplx128_a4_t)( lhs - rhs );
270  }
271  kmp_cmplx128_a4_t operator * ( const kmp_cmplx128_a4_t& b ) {
272  kmp_cmplx128 lhs = (*this).q;
273  kmp_cmplx128 rhs = b.q;
274  return (kmp_cmplx128_a4_t)( lhs * rhs );
275  }
276 
277  kmp_cmplx128_a4_t operator / ( const kmp_cmplx128_a4_t& b ) {
278  kmp_cmplx128 lhs = (*this).q;
279  kmp_cmplx128 rhs = b.q;
280  return (kmp_cmplx128_a4_t)( lhs / rhs );
281  }
282 
283  };
284 
285  #pragma pack( pop )
286 
287  // New 16-byte aligned structures for 12.0 compiler.
288  struct KMP_DO_ALIGN( 16 ) Quad_a16_t {
289  _Quad q;
290 
291  Quad_a16_t( ) : q( ) {}
292  Quad_a16_t( const _Quad & cq ) : q ( cq ) {}
293 
294  Quad_a16_t operator + ( const Quad_a16_t& b ) {
295  _Quad lhs = (*this).q;
296  _Quad rhs = b.q;
297  return (Quad_a16_t)( lhs + rhs );
298  }
299 
300  Quad_a16_t operator - ( const Quad_a16_t& b ) {
301  _Quad lhs = (*this).q;
302  _Quad rhs = b.q;
303  return (Quad_a16_t)( lhs - rhs );
304  }
305  Quad_a16_t operator * ( const Quad_a16_t& b ) {
306  _Quad lhs = (*this).q;
307  _Quad rhs = b.q;
308  return (Quad_a16_t)( lhs * rhs );
309  }
310 
311  Quad_a16_t operator / ( const Quad_a16_t& b ) {
312  _Quad lhs = (*this).q;
313  _Quad rhs = b.q;
314  return (Quad_a16_t)( lhs / rhs );
315  }
316  };
317 
318  struct KMP_DO_ALIGN( 16 ) kmp_cmplx128_a16_t {
319  kmp_cmplx128 q;
320 
321  kmp_cmplx128_a16_t() : q () {}
322 
323  kmp_cmplx128_a16_t( const kmp_cmplx128 & c128 ) : q ( c128 ) {}
324 
325  kmp_cmplx128_a16_t operator + ( const kmp_cmplx128_a16_t& b ) {
326  kmp_cmplx128 lhs = (*this).q;
327  kmp_cmplx128 rhs = b.q;
328  return (kmp_cmplx128_a16_t)( lhs + rhs );
329  }
330  kmp_cmplx128_a16_t operator - ( const kmp_cmplx128_a16_t& b ) {
331  kmp_cmplx128 lhs = (*this).q;
332  kmp_cmplx128 rhs = b.q;
333  return (kmp_cmplx128_a16_t)( lhs - rhs );
334  }
335  kmp_cmplx128_a16_t operator * ( const kmp_cmplx128_a16_t& b ) {
336  kmp_cmplx128 lhs = (*this).q;
337  kmp_cmplx128 rhs = b.q;
338  return (kmp_cmplx128_a16_t)( lhs * rhs );
339  }
340 
341  kmp_cmplx128_a16_t operator / ( const kmp_cmplx128_a16_t& b ) {
342  kmp_cmplx128 lhs = (*this).q;
343  kmp_cmplx128 rhs = b.q;
344  return (kmp_cmplx128_a16_t)( lhs / rhs );
345  }
346  };
347 
348 #endif
349 
350 #if ( KMP_ARCH_X86 )
351  #define QUAD_LEGACY Quad_a4_t
352  #define CPLX128_LEG kmp_cmplx128_a4_t
353 #else
354  #define QUAD_LEGACY _Quad
355  #define CPLX128_LEG kmp_cmplx128
356 #endif
357 
358 #ifdef __cplusplus
359  extern "C" {
360 #endif
361 
362 extern int __kmp_atomic_mode;
363 
364 //
365 // Atomic locks can easily become contended, so we use queuing locks for them.
366 //
367 
368 typedef kmp_queuing_lock_t kmp_atomic_lock_t;
369 
370 static inline void
371 __kmp_acquire_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
372 {
373 #if OMPT_SUPPORT && OMPT_TRACE
374  if (ompt_enabled &&
375  ompt_callbacks.ompt_callback(ompt_event_wait_atomic)) {
376  ompt_callbacks.ompt_callback(ompt_event_wait_atomic)(
377  (ompt_wait_id_t) lck);
378  }
379 #endif
380 
381  __kmp_acquire_queuing_lock( lck, gtid );
382 
383 #if OMPT_SUPPORT && OMPT_TRACE
384  if (ompt_enabled &&
385  ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)) {
386  ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)(
387  (ompt_wait_id_t) lck);
388  }
389 #endif
390 }
391 
392 static inline int
393 __kmp_test_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
394 {
395  return __kmp_test_queuing_lock( lck, gtid );
396 }
397 
398 static inline void
399 __kmp_release_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
400 {
401  __kmp_release_queuing_lock( lck, gtid );
402 #if OMPT_SUPPORT && OMPT_BLAME
403  if (ompt_enabled &&
404  ompt_callbacks.ompt_callback(ompt_event_release_atomic)) {
405  ompt_callbacks.ompt_callback(ompt_event_release_atomic)(
406  (ompt_wait_id_t) lck);
407  }
408 #endif
409 }
410 
411 static inline void
412 __kmp_init_atomic_lock( kmp_atomic_lock_t *lck )
413 {
414  __kmp_init_queuing_lock( lck );
415 }
416 
417 static inline void
418 __kmp_destroy_atomic_lock( kmp_atomic_lock_t *lck )
419 {
420  __kmp_destroy_queuing_lock( lck );
421 }
422 
423 // Global Locks
424 
425 extern kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */
426 extern kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */
427 extern kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */
428 extern kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */
429 extern kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */
430 extern kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */
431 extern kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */
432 extern kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */
433 extern kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */
434 extern kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */
435 extern kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/
436 extern kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/
437 extern kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */
438 
439 //
440 // Below routines for atomic UPDATE are listed
441 //
442 
443 // 1-byte
444 void __kmpc_atomic_fixed1_add( ident_t *id_ref, int gtid, char * lhs, char rhs );
445 void __kmpc_atomic_fixed1_andb( ident_t *id_ref, int gtid, char * lhs, char rhs );
446 void __kmpc_atomic_fixed1_div( ident_t *id_ref, int gtid, char * lhs, char rhs );
447 void __kmpc_atomic_fixed1u_div( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs );
448 void __kmpc_atomic_fixed1_mul( ident_t *id_ref, int gtid, char * lhs, char rhs );
449 void __kmpc_atomic_fixed1_orb( ident_t *id_ref, int gtid, char * lhs, char rhs );
450 void __kmpc_atomic_fixed1_shl( ident_t *id_ref, int gtid, char * lhs, char rhs );
451 void __kmpc_atomic_fixed1_shr( ident_t *id_ref, int gtid, char * lhs, char rhs );
452 void __kmpc_atomic_fixed1u_shr( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs );
453 void __kmpc_atomic_fixed1_sub( ident_t *id_ref, int gtid, char * lhs, char rhs );
454 void __kmpc_atomic_fixed1_xor( ident_t *id_ref, int gtid, char * lhs, char rhs );
455 // 2-byte
456 void __kmpc_atomic_fixed2_add( ident_t *id_ref, int gtid, short * lhs, short rhs );
457 void __kmpc_atomic_fixed2_andb( ident_t *id_ref, int gtid, short * lhs, short rhs );
458 void __kmpc_atomic_fixed2_div( ident_t *id_ref, int gtid, short * lhs, short rhs );
459 void __kmpc_atomic_fixed2u_div( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs );
460 void __kmpc_atomic_fixed2_mul( ident_t *id_ref, int gtid, short * lhs, short rhs );
461 void __kmpc_atomic_fixed2_orb( ident_t *id_ref, int gtid, short * lhs, short rhs );
462 void __kmpc_atomic_fixed2_shl( ident_t *id_ref, int gtid, short * lhs, short rhs );
463 void __kmpc_atomic_fixed2_shr( ident_t *id_ref, int gtid, short * lhs, short rhs );
464 void __kmpc_atomic_fixed2u_shr( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs );
465 void __kmpc_atomic_fixed2_sub( ident_t *id_ref, int gtid, short * lhs, short rhs );
466 void __kmpc_atomic_fixed2_xor( ident_t *id_ref, int gtid, short * lhs, short rhs );
467 // 4-byte add / sub fixed
468 void __kmpc_atomic_fixed4_add( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
469 void __kmpc_atomic_fixed4_sub( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
470 // 4-byte add / sub float
471 void __kmpc_atomic_float4_add( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
472 void __kmpc_atomic_float4_sub( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
473 // 8-byte add / sub fixed
474 void __kmpc_atomic_fixed8_add( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
475 void __kmpc_atomic_fixed8_sub( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
476 // 8-byte add / sub float
477 void __kmpc_atomic_float8_add( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
478 void __kmpc_atomic_float8_sub( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
479 // 4-byte fixed
480 void __kmpc_atomic_fixed4_andb( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
481 void __kmpc_atomic_fixed4_div( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
482 void __kmpc_atomic_fixed4u_div( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs );
483 void __kmpc_atomic_fixed4_mul( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
484 void __kmpc_atomic_fixed4_orb( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
485 void __kmpc_atomic_fixed4_shl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
486 void __kmpc_atomic_fixed4_shr( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
487 void __kmpc_atomic_fixed4u_shr( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs );
488 void __kmpc_atomic_fixed4_xor( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
489 // 8-byte fixed
490 void __kmpc_atomic_fixed8_andb( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
491 void __kmpc_atomic_fixed8_div( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
492 void __kmpc_atomic_fixed8u_div( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs );
493 void __kmpc_atomic_fixed8_mul( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
494 void __kmpc_atomic_fixed8_orb( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
495 void __kmpc_atomic_fixed8_shl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
496 void __kmpc_atomic_fixed8_shr( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
497 void __kmpc_atomic_fixed8u_shr( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs );
498 void __kmpc_atomic_fixed8_xor( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
499 // 4-byte float
500 void __kmpc_atomic_float4_div( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
501 void __kmpc_atomic_float4_mul( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
502 // 8-byte float
503 void __kmpc_atomic_float8_div( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
504 void __kmpc_atomic_float8_mul( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
505 // 1-, 2-, 4-, 8-byte logical (&&, ||)
506 void __kmpc_atomic_fixed1_andl( ident_t *id_ref, int gtid, char * lhs, char rhs );
507 void __kmpc_atomic_fixed1_orl( ident_t *id_ref, int gtid, char * lhs, char rhs );
508 void __kmpc_atomic_fixed2_andl( ident_t *id_ref, int gtid, short * lhs, short rhs );
509 void __kmpc_atomic_fixed2_orl( ident_t *id_ref, int gtid, short * lhs, short rhs );
510 void __kmpc_atomic_fixed4_andl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
511 void __kmpc_atomic_fixed4_orl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
512 void __kmpc_atomic_fixed8_andl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
513 void __kmpc_atomic_fixed8_orl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
514 // MIN / MAX
515 void __kmpc_atomic_fixed1_max( ident_t *id_ref, int gtid, char * lhs, char rhs );
516 void __kmpc_atomic_fixed1_min( ident_t *id_ref, int gtid, char * lhs, char rhs );
517 void __kmpc_atomic_fixed2_max( ident_t *id_ref, int gtid, short * lhs, short rhs );
518 void __kmpc_atomic_fixed2_min( ident_t *id_ref, int gtid, short * lhs, short rhs );
519 void __kmpc_atomic_fixed4_max( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
520 void __kmpc_atomic_fixed4_min( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
521 void __kmpc_atomic_fixed8_max( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
522 void __kmpc_atomic_fixed8_min( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
523 void __kmpc_atomic_float4_max( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
524 void __kmpc_atomic_float4_min( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
525 void __kmpc_atomic_float8_max( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
526 void __kmpc_atomic_float8_min( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
527 #if KMP_HAVE_QUAD
528 void __kmpc_atomic_float16_max( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
529 void __kmpc_atomic_float16_min( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
530 #if ( KMP_ARCH_X86 )
531  // Routines with 16-byte arguments aligned to 16-byte boundary; IA-32 architecture only
532  void __kmpc_atomic_float16_max_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
533  void __kmpc_atomic_float16_min_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
534 #endif
535 #endif
536 // .NEQV. (same as xor)
537 void __kmpc_atomic_fixed1_neqv( ident_t *id_ref, int gtid, char * lhs, char rhs );
538 void __kmpc_atomic_fixed2_neqv( ident_t *id_ref, int gtid, short * lhs, short rhs );
539 void __kmpc_atomic_fixed4_neqv( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
540 void __kmpc_atomic_fixed8_neqv( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
541 // .EQV. (same as ~xor)
542 void __kmpc_atomic_fixed1_eqv( ident_t *id_ref, int gtid, char * lhs, char rhs );
543 void __kmpc_atomic_fixed2_eqv( ident_t *id_ref, int gtid, short * lhs, short rhs );
544 void __kmpc_atomic_fixed4_eqv( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
545 void __kmpc_atomic_fixed8_eqv( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
546 // long double type
547 void __kmpc_atomic_float10_add( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
548 void __kmpc_atomic_float10_sub( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
549 void __kmpc_atomic_float10_mul( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
550 void __kmpc_atomic_float10_div( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
551 // _Quad type
552 #if KMP_HAVE_QUAD
553 void __kmpc_atomic_float16_add( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
554 void __kmpc_atomic_float16_sub( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
555 void __kmpc_atomic_float16_mul( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
556 void __kmpc_atomic_float16_div( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
557 #if ( KMP_ARCH_X86 )
558  // Routines with 16-byte arguments aligned to 16-byte boundary
559  void __kmpc_atomic_float16_add_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
560  void __kmpc_atomic_float16_sub_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
561  void __kmpc_atomic_float16_mul_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
562  void __kmpc_atomic_float16_div_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
563 #endif
564 #endif
565 // routines for complex types
566 void __kmpc_atomic_cmplx4_add( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
567 void __kmpc_atomic_cmplx4_sub( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
568 void __kmpc_atomic_cmplx4_mul( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
569 void __kmpc_atomic_cmplx4_div( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
570 void __kmpc_atomic_cmplx8_add( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
571 void __kmpc_atomic_cmplx8_sub( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
572 void __kmpc_atomic_cmplx8_mul( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
573 void __kmpc_atomic_cmplx8_div( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
574 void __kmpc_atomic_cmplx10_add( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
575 void __kmpc_atomic_cmplx10_sub( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
576 void __kmpc_atomic_cmplx10_mul( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
577 void __kmpc_atomic_cmplx10_div( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
578 #if KMP_HAVE_QUAD
579 void __kmpc_atomic_cmplx16_add( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
580 void __kmpc_atomic_cmplx16_sub( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
581 void __kmpc_atomic_cmplx16_mul( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
582 void __kmpc_atomic_cmplx16_div( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
583 #if ( KMP_ARCH_X86 )
584  // Routines with 16-byte arguments aligned to 16-byte boundary
585  void __kmpc_atomic_cmplx16_add_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
586  void __kmpc_atomic_cmplx16_sub_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
587  void __kmpc_atomic_cmplx16_mul_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
588  void __kmpc_atomic_cmplx16_div_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
589 #endif
590 #endif
591 
592 #if OMP_40_ENABLED
593 
594 // OpenMP 4.0: x = expr binop x for non-commutative operations.
595 // Supported only on IA-32 architecture and Intel(R) 64
596 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
597 
598 void __kmpc_atomic_fixed1_sub_rev( ident_t *id_ref, int gtid, char * lhs, char rhs );
599 void __kmpc_atomic_fixed1_div_rev( ident_t *id_ref, int gtid, char * lhs, char rhs );
600 void __kmpc_atomic_fixed1u_div_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs );
601 void __kmpc_atomic_fixed1_shl_rev( ident_t *id_ref, int gtid, char * lhs, char rhs );
602 void __kmpc_atomic_fixed1_shr_rev( ident_t *id_ref, int gtid, char * lhs, char rhs );
603 void __kmpc_atomic_fixed1u_shr_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs );
604 void __kmpc_atomic_fixed2_sub_rev( ident_t *id_ref, int gtid, short * lhs, short rhs );
605 void __kmpc_atomic_fixed2_div_rev( ident_t *id_ref, int gtid, short * lhs, short rhs );
606 void __kmpc_atomic_fixed2u_div_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs );
607 void __kmpc_atomic_fixed2_shl_rev( ident_t *id_ref, int gtid, short * lhs, short rhs );
608 void __kmpc_atomic_fixed2_shr_rev( ident_t *id_ref, int gtid, short * lhs, short rhs );
609 void __kmpc_atomic_fixed2u_shr_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs );
610 void __kmpc_atomic_fixed4_sub_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
611 void __kmpc_atomic_fixed4_div_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
612 void __kmpc_atomic_fixed4u_div_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs );
613 void __kmpc_atomic_fixed4_shl_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
614 void __kmpc_atomic_fixed4_shr_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
615 void __kmpc_atomic_fixed4u_shr_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs );
616 void __kmpc_atomic_fixed8_sub_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
617 void __kmpc_atomic_fixed8_div_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
618 void __kmpc_atomic_fixed8u_div_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs );
619 void __kmpc_atomic_fixed8_shl_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
620 void __kmpc_atomic_fixed8_shr_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
621 void __kmpc_atomic_fixed8u_shr_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs );
622 void __kmpc_atomic_float4_sub_rev( ident_t *id_ref, int gtid, float * lhs, float rhs );
623 void __kmpc_atomic_float4_div_rev( ident_t *id_ref, int gtid, float * lhs, float rhs );
624 void __kmpc_atomic_float8_sub_rev( ident_t *id_ref, int gtid, double * lhs, double rhs );
625 void __kmpc_atomic_float8_div_rev( ident_t *id_ref, int gtid, double * lhs, double rhs );
626 void __kmpc_atomic_float10_sub_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
627 void __kmpc_atomic_float10_div_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
628 #if KMP_HAVE_QUAD
629 void __kmpc_atomic_float16_sub_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
630 void __kmpc_atomic_float16_div_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
631 #endif
632 void __kmpc_atomic_cmplx4_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
633 void __kmpc_atomic_cmplx4_div_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
634 void __kmpc_atomic_cmplx8_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
635 void __kmpc_atomic_cmplx8_div_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
636 void __kmpc_atomic_cmplx10_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
637 void __kmpc_atomic_cmplx10_div_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
638 #if KMP_HAVE_QUAD
639 void __kmpc_atomic_cmplx16_sub_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
640 void __kmpc_atomic_cmplx16_div_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
641 #if ( KMP_ARCH_X86 )
642  // Routines with 16-byte arguments aligned to 16-byte boundary
643  void __kmpc_atomic_float16_sub_a16_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
644  void __kmpc_atomic_float16_div_a16_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
645  void __kmpc_atomic_cmplx16_sub_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
646  void __kmpc_atomic_cmplx16_div_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
647 #endif
648 #endif // KMP_HAVE_QUAD
649 
650 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
651 
652 #endif //OMP_40_ENABLED
653 
654 // routines for mixed types
655 
656 // RHS=float8
657 void __kmpc_atomic_fixed1_mul_float8( ident_t *id_ref, int gtid, char * lhs, kmp_real64 rhs );
658 void __kmpc_atomic_fixed1_div_float8( ident_t *id_ref, int gtid, char * lhs, kmp_real64 rhs );
659 void __kmpc_atomic_fixed2_mul_float8( ident_t *id_ref, int gtid, short * lhs, kmp_real64 rhs );
660 void __kmpc_atomic_fixed2_div_float8( ident_t *id_ref, int gtid, short * lhs, kmp_real64 rhs );
661 void __kmpc_atomic_fixed4_mul_float8( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_real64 rhs );
662 void __kmpc_atomic_fixed4_div_float8( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_real64 rhs );
663 void __kmpc_atomic_fixed8_mul_float8( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_real64 rhs );
664 void __kmpc_atomic_fixed8_div_float8( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_real64 rhs );
665 void __kmpc_atomic_float4_add_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs );
666 void __kmpc_atomic_float4_sub_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs );
667 void __kmpc_atomic_float4_mul_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs );
668 void __kmpc_atomic_float4_div_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs );
669 
670 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
671 #if KMP_HAVE_QUAD
672 void __kmpc_atomic_fixed1_add_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
673 void __kmpc_atomic_fixed1_sub_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
674 void __kmpc_atomic_fixed1_mul_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
675 void __kmpc_atomic_fixed1_div_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
676 void __kmpc_atomic_fixed1u_div_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs );
677 
678 void __kmpc_atomic_fixed2_add_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs );
679 void __kmpc_atomic_fixed2_sub_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs );
680 void __kmpc_atomic_fixed2_mul_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs );
681 void __kmpc_atomic_fixed2_div_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs );
682 void __kmpc_atomic_fixed2u_div_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs );
683 
684 void __kmpc_atomic_fixed4_add_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs );
685 void __kmpc_atomic_fixed4_sub_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs );
686 void __kmpc_atomic_fixed4_mul_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs );
687 void __kmpc_atomic_fixed4_div_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs );
688 void __kmpc_atomic_fixed4u_div_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs );
689 
690 void __kmpc_atomic_fixed8_add_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs );
691 void __kmpc_atomic_fixed8_sub_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs );
692 void __kmpc_atomic_fixed8_mul_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs );
693 void __kmpc_atomic_fixed8_div_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs );
694 void __kmpc_atomic_fixed8u_div_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs );
695 
696 void __kmpc_atomic_float4_add_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs );
697 void __kmpc_atomic_float4_sub_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs );
698 void __kmpc_atomic_float4_mul_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs );
699 void __kmpc_atomic_float4_div_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs );
700 
701 void __kmpc_atomic_float8_add_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs );
702 void __kmpc_atomic_float8_sub_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs );
703 void __kmpc_atomic_float8_mul_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs );
704 void __kmpc_atomic_float8_div_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs );
705 
706 void __kmpc_atomic_float10_add_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
707 void __kmpc_atomic_float10_sub_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
708 void __kmpc_atomic_float10_mul_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
709 void __kmpc_atomic_float10_div_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
710 #endif // KMP_HAVE_QUAD
711 
712 // RHS=cmplx8
713 void __kmpc_atomic_cmplx4_add_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs );
714 void __kmpc_atomic_cmplx4_sub_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs );
715 void __kmpc_atomic_cmplx4_mul_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs );
716 void __kmpc_atomic_cmplx4_div_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs );
717 
718 // generic atomic routines
719 void __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
720 void __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
721 void __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
722 void __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
723 void __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
724 void __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
725 void __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
726 void __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
727 
728 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
729 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
730 
731 //
732 // Below routines for atomic READ are listed
733 //
734 
735 char __kmpc_atomic_fixed1_rd( ident_t *id_ref, int gtid, char * loc );
736 short __kmpc_atomic_fixed2_rd( ident_t *id_ref, int gtid, short * loc );
737 kmp_int32 __kmpc_atomic_fixed4_rd( ident_t *id_ref, int gtid, kmp_int32 * loc );
738 kmp_int64 __kmpc_atomic_fixed8_rd( ident_t *id_ref, int gtid, kmp_int64 * loc );
739 kmp_real32 __kmpc_atomic_float4_rd( ident_t *id_ref, int gtid, kmp_real32 * loc );
740 kmp_real64 __kmpc_atomic_float8_rd( ident_t *id_ref, int gtid, kmp_real64 * loc );
741 long double __kmpc_atomic_float10_rd( ident_t *id_ref, int gtid, long double * loc );
742 #if KMP_HAVE_QUAD
743 QUAD_LEGACY __kmpc_atomic_float16_rd( ident_t *id_ref, int gtid, QUAD_LEGACY * loc );
744 #endif
745 // Fix for CQ220361: cmplx4 READ will return void on Windows* OS; read value will be
746 // returned through an additional parameter
747 #if ( KMP_OS_WINDOWS )
748  void __kmpc_atomic_cmplx4_rd( kmp_cmplx32 * out, ident_t *id_ref, int gtid, kmp_cmplx32 * loc );
749 #else
750  kmp_cmplx32 __kmpc_atomic_cmplx4_rd( ident_t *id_ref, int gtid, kmp_cmplx32 * loc );
751 #endif
752 kmp_cmplx64 __kmpc_atomic_cmplx8_rd( ident_t *id_ref, int gtid, kmp_cmplx64 * loc );
753 kmp_cmplx80 __kmpc_atomic_cmplx10_rd( ident_t *id_ref, int gtid, kmp_cmplx80 * loc );
754 #if KMP_HAVE_QUAD
755 CPLX128_LEG __kmpc_atomic_cmplx16_rd( ident_t *id_ref, int gtid, CPLX128_LEG * loc );
756 #if ( KMP_ARCH_X86 )
757  // Routines with 16-byte arguments aligned to 16-byte boundary
758  Quad_a16_t __kmpc_atomic_float16_a16_rd( ident_t * id_ref, int gtid, Quad_a16_t * loc );
759  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_rd( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * loc );
760 #endif
761 #endif
762 
763 
764 //
765 // Below routines for atomic WRITE are listed
766 //
767 
768 void __kmpc_atomic_fixed1_wr( ident_t *id_ref, int gtid, char * lhs, char rhs );
769 void __kmpc_atomic_fixed2_wr( ident_t *id_ref, int gtid, short * lhs, short rhs );
770 void __kmpc_atomic_fixed4_wr( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
771 void __kmpc_atomic_fixed8_wr( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
772 void __kmpc_atomic_float4_wr( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
773 void __kmpc_atomic_float8_wr( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
774 void __kmpc_atomic_float10_wr( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
775 #if KMP_HAVE_QUAD
776 void __kmpc_atomic_float16_wr( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
777 #endif
778 void __kmpc_atomic_cmplx4_wr( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
779 void __kmpc_atomic_cmplx8_wr( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
780 void __kmpc_atomic_cmplx10_wr( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
781 #if KMP_HAVE_QUAD
782 void __kmpc_atomic_cmplx16_wr( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
783 #if ( KMP_ARCH_X86 )
784  // Routines with 16-byte arguments aligned to 16-byte boundary
785  void __kmpc_atomic_float16_a16_wr( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
786  void __kmpc_atomic_cmplx16_a16_wr( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
787 #endif
788 #endif
789 
790 //
791 // Below routines for atomic CAPTURE are listed
792 //
793 
794 // 1-byte
795 char __kmpc_atomic_fixed1_add_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
796 char __kmpc_atomic_fixed1_andb_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
797 char __kmpc_atomic_fixed1_div_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
798 unsigned char __kmpc_atomic_fixed1u_div_cpt( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag);
799 char __kmpc_atomic_fixed1_mul_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
800 char __kmpc_atomic_fixed1_orb_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
801 char __kmpc_atomic_fixed1_shl_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
802 char __kmpc_atomic_fixed1_shr_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
803 unsigned char __kmpc_atomic_fixed1u_shr_cpt( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag);
804 char __kmpc_atomic_fixed1_sub_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
805 char __kmpc_atomic_fixed1_xor_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
806 // 2-byte
807 short __kmpc_atomic_fixed2_add_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
808 short __kmpc_atomic_fixed2_andb_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
809 short __kmpc_atomic_fixed2_div_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
810 unsigned short __kmpc_atomic_fixed2u_div_cpt( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag);
811 short __kmpc_atomic_fixed2_mul_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
812 short __kmpc_atomic_fixed2_orb_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
813 short __kmpc_atomic_fixed2_shl_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
814 short __kmpc_atomic_fixed2_shr_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
815 unsigned short __kmpc_atomic_fixed2u_shr_cpt( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag);
816 short __kmpc_atomic_fixed2_sub_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
817 short __kmpc_atomic_fixed2_xor_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
818 // 4-byte add / sub fixed
819 kmp_int32 __kmpc_atomic_fixed4_add_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
820 kmp_int32 __kmpc_atomic_fixed4_sub_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
821 // 4-byte add / sub float
822 kmp_real32 __kmpc_atomic_float4_add_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
823 kmp_real32 __kmpc_atomic_float4_sub_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
824 // 8-byte add / sub fixed
825 kmp_int64 __kmpc_atomic_fixed8_add_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
826 kmp_int64 __kmpc_atomic_fixed8_sub_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
827 // 8-byte add / sub float
828 kmp_real64 __kmpc_atomic_float8_add_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
829 kmp_real64 __kmpc_atomic_float8_sub_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
830 // 4-byte fixed
831 kmp_int32 __kmpc_atomic_fixed4_andb_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
832 kmp_int32 __kmpc_atomic_fixed4_div_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
833 kmp_uint32 __kmpc_atomic_fixed4u_div_cpt( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag);
834 kmp_int32 __kmpc_atomic_fixed4_mul_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
835 kmp_int32 __kmpc_atomic_fixed4_orb_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
836 kmp_int32 __kmpc_atomic_fixed4_shl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
837 kmp_int32 __kmpc_atomic_fixed4_shr_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
838 kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag);
839 kmp_int32 __kmpc_atomic_fixed4_xor_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
840 // 8-byte fixed
841 kmp_int64 __kmpc_atomic_fixed8_andb_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
842 kmp_int64 __kmpc_atomic_fixed8_div_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
843 kmp_uint64 __kmpc_atomic_fixed8u_div_cpt( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag);
844 kmp_int64 __kmpc_atomic_fixed8_mul_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
845 kmp_int64 __kmpc_atomic_fixed8_orb_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
846 kmp_int64 __kmpc_atomic_fixed8_shl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
847 kmp_int64 __kmpc_atomic_fixed8_shr_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
848 kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag);
849 kmp_int64 __kmpc_atomic_fixed8_xor_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
850 // 4-byte float
851 kmp_real32 __kmpc_atomic_float4_div_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
852 kmp_real32 __kmpc_atomic_float4_mul_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
853 // 8-byte float
854 kmp_real64 __kmpc_atomic_float8_div_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
855 kmp_real64 __kmpc_atomic_float8_mul_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
856 // 1-, 2-, 4-, 8-byte logical (&&, ||)
857 char __kmpc_atomic_fixed1_andl_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
858 char __kmpc_atomic_fixed1_orl_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
859 short __kmpc_atomic_fixed2_andl_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
860 short __kmpc_atomic_fixed2_orl_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
861 kmp_int32 __kmpc_atomic_fixed4_andl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
862 kmp_int32 __kmpc_atomic_fixed4_orl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
863 kmp_int64 __kmpc_atomic_fixed8_andl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
864 kmp_int64 __kmpc_atomic_fixed8_orl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
865 // MIN / MAX
866 char __kmpc_atomic_fixed1_max_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
867 char __kmpc_atomic_fixed1_min_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
868 short __kmpc_atomic_fixed2_max_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
869 short __kmpc_atomic_fixed2_min_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
870 kmp_int32 __kmpc_atomic_fixed4_max_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
871 kmp_int32 __kmpc_atomic_fixed4_min_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
872 kmp_int64 __kmpc_atomic_fixed8_max_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
873 kmp_int64 __kmpc_atomic_fixed8_min_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
874 kmp_real32 __kmpc_atomic_float4_max_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
875 kmp_real32 __kmpc_atomic_float4_min_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
876 kmp_real64 __kmpc_atomic_float8_max_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
877 kmp_real64 __kmpc_atomic_float8_min_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
878 #if KMP_HAVE_QUAD
879 QUAD_LEGACY __kmpc_atomic_float16_max_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
880 QUAD_LEGACY __kmpc_atomic_float16_min_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
881 #endif
882 // .NEQV. (same as xor)
883 char __kmpc_atomic_fixed1_neqv_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
884 short __kmpc_atomic_fixed2_neqv_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
885 kmp_int32 __kmpc_atomic_fixed4_neqv_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
886 kmp_int64 __kmpc_atomic_fixed8_neqv_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
887 // .EQV. (same as ~xor)
888 char __kmpc_atomic_fixed1_eqv_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
889 short __kmpc_atomic_fixed2_eqv_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
890 kmp_int32 __kmpc_atomic_fixed4_eqv_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
891 kmp_int64 __kmpc_atomic_fixed8_eqv_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
892 // long double type
893 long double __kmpc_atomic_float10_add_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
894 long double __kmpc_atomic_float10_sub_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
895 long double __kmpc_atomic_float10_mul_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
896 long double __kmpc_atomic_float10_div_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
897 #if KMP_HAVE_QUAD
898 // _Quad type
899 QUAD_LEGACY __kmpc_atomic_float16_add_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
900 QUAD_LEGACY __kmpc_atomic_float16_sub_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
901 QUAD_LEGACY __kmpc_atomic_float16_mul_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
902 QUAD_LEGACY __kmpc_atomic_float16_div_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
903 #endif
904 // routines for complex types
905 // Workaround for cmplx4 routines - return void; captured value is returned via the argument
906 void __kmpc_atomic_cmplx4_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag);
907 void __kmpc_atomic_cmplx4_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag);
908 void __kmpc_atomic_cmplx4_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag);
909 void __kmpc_atomic_cmplx4_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag);
910 
911 kmp_cmplx64 __kmpc_atomic_cmplx8_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag);
912 kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag);
913 kmp_cmplx64 __kmpc_atomic_cmplx8_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag);
914 kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag);
915 kmp_cmplx80 __kmpc_atomic_cmplx10_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
916 kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
917 kmp_cmplx80 __kmpc_atomic_cmplx10_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
918 kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
919 #if KMP_HAVE_QUAD
920 CPLX128_LEG __kmpc_atomic_cmplx16_add_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
921 CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
922 CPLX128_LEG __kmpc_atomic_cmplx16_mul_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
923 CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
924 #if ( KMP_ARCH_X86 )
925  // Routines with 16-byte arguments aligned to 16-byte boundary
926  Quad_a16_t __kmpc_atomic_float16_add_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
927  Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
928  Quad_a16_t __kmpc_atomic_float16_mul_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
929  Quad_a16_t __kmpc_atomic_float16_div_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
930  Quad_a16_t __kmpc_atomic_float16_max_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
931  Quad_a16_t __kmpc_atomic_float16_min_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
932  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_add_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
933  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
934  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_mul_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
935  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
936 #endif
937 #endif
938 
939 void __kmpc_atomic_start(void);
940 void __kmpc_atomic_end(void);
941 
942 #if OMP_40_ENABLED
943 
944 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations.
945 
946 char __kmpc_atomic_fixed1_sub_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag );
947 char __kmpc_atomic_fixed1_div_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag );
948 unsigned char __kmpc_atomic_fixed1u_div_cpt_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag );
949 char __kmpc_atomic_fixed1_shl_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs , int flag);
950 char __kmpc_atomic_fixed1_shr_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag );
951 unsigned char __kmpc_atomic_fixed1u_shr_cpt_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag );
952 short __kmpc_atomic_fixed2_sub_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag );
953 short __kmpc_atomic_fixed2_div_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag );
954 unsigned short __kmpc_atomic_fixed2u_div_cpt_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag );
955 short __kmpc_atomic_fixed2_shl_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag );
956 short __kmpc_atomic_fixed2_shr_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag );
957 unsigned short __kmpc_atomic_fixed2u_shr_cpt_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag );
958 kmp_int32 __kmpc_atomic_fixed4_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag );
959 kmp_int32 __kmpc_atomic_fixed4_div_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag );
960 kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag );
961 kmp_int32 __kmpc_atomic_fixed4_shl_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag );
962 kmp_int32 __kmpc_atomic_fixed4_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag );
963 kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag );
964 kmp_int64 __kmpc_atomic_fixed8_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag );
965 kmp_int64 __kmpc_atomic_fixed8_div_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag );
966 kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag );
967 kmp_int64 __kmpc_atomic_fixed8_shl_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag );
968 kmp_int64 __kmpc_atomic_fixed8_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag );
969 kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag );
970 float __kmpc_atomic_float4_sub_cpt_rev( ident_t *id_ref, int gtid, float * lhs, float rhs, int flag );
971 float __kmpc_atomic_float4_div_cpt_rev( ident_t *id_ref, int gtid, float * lhs, float rhs, int flag );
972 double __kmpc_atomic_float8_sub_cpt_rev( ident_t *id_ref, int gtid, double * lhs, double rhs, int flag );
973 double __kmpc_atomic_float8_div_cpt_rev( ident_t *id_ref, int gtid, double * lhs, double rhs, int flag );
974 long double __kmpc_atomic_float10_sub_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag );
975 long double __kmpc_atomic_float10_div_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag );
976 #if KMP_HAVE_QUAD
977 QUAD_LEGACY __kmpc_atomic_float16_sub_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag );
978 QUAD_LEGACY __kmpc_atomic_float16_div_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag );
979 #endif
980 // Workaround for cmplx4 routines - return void; captured value is returned via the argument
981 void __kmpc_atomic_cmplx4_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
982 void __kmpc_atomic_cmplx4_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
983 kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag );
984 kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag );
985 kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag );
986 kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag );
987 #if KMP_HAVE_QUAD
988 CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag );
989 CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag );
990 #if ( KMP_ARCH_X86 )
991  Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag );
992  Quad_a16_t __kmpc_atomic_float16_div_a16_cpt_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag );
993  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag );
994  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag );
995 #endif
996 #endif
997 
998 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
999 char __kmpc_atomic_fixed1_swp( ident_t *id_ref, int gtid, char * lhs, char rhs );
1000 short __kmpc_atomic_fixed2_swp( ident_t *id_ref, int gtid, short * lhs, short rhs );
1001 kmp_int32 __kmpc_atomic_fixed4_swp( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
1002 kmp_int64 __kmpc_atomic_fixed8_swp( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
1003 float __kmpc_atomic_float4_swp( ident_t *id_ref, int gtid, float * lhs, float rhs );
1004 double __kmpc_atomic_float8_swp( ident_t *id_ref, int gtid, double * lhs, double rhs );
1005 long double __kmpc_atomic_float10_swp( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
1006 #if KMP_HAVE_QUAD
1007 QUAD_LEGACY __kmpc_atomic_float16_swp( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
1008 #endif
1009 // !!! TODO: check if we need a workaround here
1010 void __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out );
1011 //kmp_cmplx32 __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
1012 
1013 kmp_cmplx64 __kmpc_atomic_cmplx8_swp( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
1014 kmp_cmplx80 __kmpc_atomic_cmplx10_swp( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
1015 #if KMP_HAVE_QUAD
1016 CPLX128_LEG __kmpc_atomic_cmplx16_swp( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
1017 #if ( KMP_ARCH_X86 )
1018  Quad_a16_t __kmpc_atomic_float16_a16_swp( ident_t *id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
1019  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_swp( ident_t *id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
1020 #endif
1021 #endif
1022 
1023 // End of OpenMP 4.0 capture
1024 
1025 #endif //OMP_40_ENABLED
1026 
1027 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
1028 
1029 /* ------------------------------------------------------------------------ */
1030 /* ------------------------------------------------------------------------ */
1031 
1032 #ifdef __cplusplus
1033  } // extern "C"
1034 #endif
1035 
1036 #endif /* KMP_ATOMIC_H */
1037 
1038 // end of file
Definition: kmp.h:200