LLVM OpenMP* Runtime Library
kmp_atomic.cpp
1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp_atomic.h"
17 #include "kmp.h" // TRUE, asm routines prototypes
18 
19 typedef unsigned char uchar;
20 typedef unsigned short ushort;
21 
551 /*
552  * Global vars
553  */
554 
555 #ifndef KMP_GOMP_COMPAT
556 int __kmp_atomic_mode = 1; // Intel perf
557 #else
558 int __kmp_atomic_mode = 2; // GOMP compatibility
559 #endif /* KMP_GOMP_COMPAT */
560 
561 KMP_ALIGN(128)
562 
563 kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */
564 kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */
565 kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */
566 kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */
567 kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */
568 kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */
569 kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */
570 kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */
571 kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */
572 kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */
573 kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/
574 kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/
575 kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */
576 
577 
578 /*
579  2007-03-02:
580  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a
581  bug on *_32 and *_32e. This is just a temporary workaround for the problem.
582  It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG
583  routines in assembler language.
584 */
585 #define KMP_ATOMIC_VOLATILE volatile
586 
587 #if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD
588 
589  static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; };
590  static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; };
591  static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; };
592  static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; };
593  static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; }
594  static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; }
595 
596  static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; };
597  static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; };
598  static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; };
599  static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; };
600  static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; }
601  static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; }
602 
603  static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; };
604  static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; };
605  static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; };
606  static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; };
607 
608  static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; };
609  static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; };
610  static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; };
611  static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; };
612 
613 #endif
614 
615 /* ------------------------------------------------------------------------ */
616 /* ATOMIC implementation routines */
617 /* one routine for each operation and operand type */
618 /* ------------------------------------------------------------------------ */
619 
620 // All routines declarations looks like
621 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
622 // ------------------------------------------------------------------------
623 
624 #define KMP_CHECK_GTID \
625  if ( gtid == KMP_GTID_UNKNOWN ) { \
626  gtid = __kmp_entry_gtid(); \
627  } // check and get gtid when needed
628 
629 // Beginning of a definition (provides name, parameters, gebug trace)
630 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
631 // OP_ID - operation identifier (add, sub, mul, ...)
632 // TYPE - operands' type
633 #define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
634 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
635 { \
636  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
637  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
638 
639 // ------------------------------------------------------------------------
640 // Lock variables used for critical sections for various size operands
641 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
642 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
643 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
644 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
645 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
646 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
647 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
648 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
649 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
650 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
651 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
652 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
653 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
654 
655 // ------------------------------------------------------------------------
656 // Operation on *lhs, rhs bound by critical section
657 // OP - operator (it's supposed to contain an assignment)
658 // LCK_ID - lock identifier
659 // Note: don't check gtid as it should always be valid
660 // 1, 2-byte - expect valid parameter, other - check before this macro
661 #define OP_CRITICAL(OP,LCK_ID) \
662  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
663  \
664  (*lhs) OP (rhs); \
665  \
666  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
667 
668 // ------------------------------------------------------------------------
669 // For GNU compatibility, we may need to use a critical section,
670 // even though it is not required by the ISA.
671 //
672 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
673 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
674 // critical section. On Intel(R) 64, all atomic operations are done with fetch
675 // and add or compare and exchange. Therefore, the FLAG parameter to this
676 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
677 // require a critical section, where we predict that they will be implemented
678 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
679 //
680 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
681 // the FLAG parameter should always be 1. If we know that we will be using
682 // a critical section, then we want to make certain that we use the generic
683 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
684 // locks that are specialized based upon the size or type of the data.
685 //
686 // If FLAG is 0, then we are relying on dead code elimination by the build
687 // compiler to get rid of the useless block of code, and save a needless
688 // branch at runtime.
689 //
690 
691 #ifdef KMP_GOMP_COMPAT
692 # define OP_GOMP_CRITICAL(OP,FLAG) \
693  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
694  KMP_CHECK_GTID; \
695  OP_CRITICAL( OP, 0 ); \
696  return; \
697  }
698 # else
699 # define OP_GOMP_CRITICAL(OP,FLAG)
700 #endif /* KMP_GOMP_COMPAT */
701 
702 #if KMP_MIC
703 # define KMP_DO_PAUSE _mm_delay_32( 1 )
704 #else
705 # define KMP_DO_PAUSE KMP_CPU_PAUSE()
706 #endif /* KMP_MIC */
707 
708 // ------------------------------------------------------------------------
709 // Operation on *lhs, rhs using "compare_and_store" routine
710 // TYPE - operands' type
711 // BITS - size in bits, used to distinguish low level calls
712 // OP - operator
713 #define OP_CMPXCHG(TYPE,BITS,OP) \
714  { \
715  TYPE old_value, new_value; \
716  old_value = *(TYPE volatile *)lhs; \
717  new_value = old_value OP rhs; \
718  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
719  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
720  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
721  { \
722  KMP_DO_PAUSE; \
723  \
724  old_value = *(TYPE volatile *)lhs; \
725  new_value = old_value OP rhs; \
726  } \
727  }
728 
729 #if USE_CMPXCHG_FIX
730 // 2007-06-25:
731 // workaround for C78287 (complex(kind=4) data type)
732 // lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm)
733 // Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro.
734 // This is a problem of the compiler.
735 // Related tracker is C76005, targeted to 11.0.
736 // I verified the asm of the workaround.
737 #define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
738  { \
739  struct _sss { \
740  TYPE cmp; \
741  kmp_int##BITS *vvv; \
742  }; \
743  struct _sss old_value, new_value; \
744  old_value.vvv = ( kmp_int##BITS * )&old_value.cmp; \
745  new_value.vvv = ( kmp_int##BITS * )&new_value.cmp; \
746  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
747  new_value.cmp = old_value.cmp OP rhs; \
748  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
749  *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
750  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) ) \
751  { \
752  KMP_DO_PAUSE; \
753  \
754  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
755  new_value.cmp = old_value.cmp OP rhs; \
756  } \
757  }
758 // end of the first part of the workaround for C78287
759 #endif // USE_CMPXCHG_FIX
760 
761 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
762 
763 // ------------------------------------------------------------------------
764 // X86 or X86_64: no alignment problems ====================================
765 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
766 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
767  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
768  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
769  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
770 }
771 // -------------------------------------------------------------------------
772 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
773 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
774  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
775  OP_CMPXCHG(TYPE,BITS,OP) \
776 }
777 #if USE_CMPXCHG_FIX
778 // -------------------------------------------------------------------------
779 // workaround for C78287 (complex(kind=4) data type)
780 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
781 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
782  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
783  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
784 }
785 // end of the second part of the workaround for C78287
786 #endif
787 
788 #else
789 // -------------------------------------------------------------------------
790 // Code for other architectures that don't handle unaligned accesses.
791 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
792 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
793  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
794  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
795  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
796  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
797  } else { \
798  KMP_CHECK_GTID; \
799  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
800  } \
801 }
802 // -------------------------------------------------------------------------
803 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
804 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
805  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
806  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
807  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
808  } else { \
809  KMP_CHECK_GTID; \
810  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
811  } \
812 }
813 #if USE_CMPXCHG_FIX
814 // -------------------------------------------------------------------------
815 // workaround for C78287 (complex(kind=4) data type)
816 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
817 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
818  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
819  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
820  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
821  } else { \
822  KMP_CHECK_GTID; \
823  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
824  } \
825 }
826 // end of the second part of the workaround for C78287
827 #endif // USE_CMPXCHG_FIX
828 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
829 
830 // Routines for ATOMIC 4-byte operands addition and subtraction
831 ATOMIC_FIXED_ADD( fixed4, add, kmp_int32, 32, +, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add
832 ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32, 32, -, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub
833 
834 ATOMIC_CMPXCHG( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add
835 ATOMIC_CMPXCHG( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub
836 
837 // Routines for ATOMIC 8-byte operands addition and subtraction
838 ATOMIC_FIXED_ADD( fixed8, add, kmp_int64, 64, +, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add
839 ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64, 64, -, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub
840 
841 ATOMIC_CMPXCHG( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add
842 ATOMIC_CMPXCHG( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub
843 
844 // ------------------------------------------------------------------------
845 // Entries definition for integer operands
846 // TYPE_ID - operands type and size (fixed4, float4)
847 // OP_ID - operation identifier (add, sub, mul, ...)
848 // TYPE - operand type
849 // BITS - size in bits, used to distinguish low level calls
850 // OP - operator (used in critical section)
851 // LCK_ID - lock identifier, used to possibly distinguish lock variable
852 // MASK - used for alignment check
853 
854 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
855 // ------------------------------------------------------------------------
856 // Routines for ATOMIC integer operands, other operators
857 // ------------------------------------------------------------------------
858 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
859 ATOMIC_CMPXCHG( fixed1, add, kmp_int8, 8, +, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add
860 ATOMIC_CMPXCHG( fixed1, andb, kmp_int8, 8, &, 1i, 0, 0 ) // __kmpc_atomic_fixed1_andb
861 ATOMIC_CMPXCHG( fixed1, div, kmp_int8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div
862 ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div
863 ATOMIC_CMPXCHG( fixed1, mul, kmp_int8, 8, *, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul
864 ATOMIC_CMPXCHG( fixed1, orb, kmp_int8, 8, |, 1i, 0, 0 ) // __kmpc_atomic_fixed1_orb
865 ATOMIC_CMPXCHG( fixed1, shl, kmp_int8, 8, <<, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl
866 ATOMIC_CMPXCHG( fixed1, shr, kmp_int8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr
867 ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr
868 ATOMIC_CMPXCHG( fixed1, sub, kmp_int8, 8, -, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub
869 ATOMIC_CMPXCHG( fixed1, xor, kmp_int8, 8, ^, 1i, 0, 0 ) // __kmpc_atomic_fixed1_xor
870 ATOMIC_CMPXCHG( fixed2, add, kmp_int16, 16, +, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add
871 ATOMIC_CMPXCHG( fixed2, andb, kmp_int16, 16, &, 2i, 1, 0 ) // __kmpc_atomic_fixed2_andb
872 ATOMIC_CMPXCHG( fixed2, div, kmp_int16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div
873 ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div
874 ATOMIC_CMPXCHG( fixed2, mul, kmp_int16, 16, *, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul
875 ATOMIC_CMPXCHG( fixed2, orb, kmp_int16, 16, |, 2i, 1, 0 ) // __kmpc_atomic_fixed2_orb
876 ATOMIC_CMPXCHG( fixed2, shl, kmp_int16, 16, <<, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl
877 ATOMIC_CMPXCHG( fixed2, shr, kmp_int16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr
878 ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr
879 ATOMIC_CMPXCHG( fixed2, sub, kmp_int16, 16, -, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub
880 ATOMIC_CMPXCHG( fixed2, xor, kmp_int16, 16, ^, 2i, 1, 0 ) // __kmpc_atomic_fixed2_xor
881 ATOMIC_CMPXCHG( fixed4, andb, kmp_int32, 32, &, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andb
882 ATOMIC_CMPXCHG( fixed4, div, kmp_int32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div
883 ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div
884 ATOMIC_CMPXCHG( fixed4, mul, kmp_int32, 32, *, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul
885 ATOMIC_CMPXCHG( fixed4, orb, kmp_int32, 32, |, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orb
886 ATOMIC_CMPXCHG( fixed4, shl, kmp_int32, 32, <<, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl
887 ATOMIC_CMPXCHG( fixed4, shr, kmp_int32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr
888 ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr
889 ATOMIC_CMPXCHG( fixed4, xor, kmp_int32, 32, ^, 4i, 3, 0 ) // __kmpc_atomic_fixed4_xor
890 ATOMIC_CMPXCHG( fixed8, andb, kmp_int64, 64, &, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb
891 ATOMIC_CMPXCHG( fixed8, div, kmp_int64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div
892 ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div
893 ATOMIC_CMPXCHG( fixed8, mul, kmp_int64, 64, *, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul
894 ATOMIC_CMPXCHG( fixed8, orb, kmp_int64, 64, |, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb
895 ATOMIC_CMPXCHG( fixed8, shl, kmp_int64, 64, <<, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl
896 ATOMIC_CMPXCHG( fixed8, shr, kmp_int64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr
897 ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr
898 ATOMIC_CMPXCHG( fixed8, xor, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor
899 ATOMIC_CMPXCHG( float4, div, kmp_real32, 32, /, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div
900 ATOMIC_CMPXCHG( float4, mul, kmp_real32, 32, *, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul
901 ATOMIC_CMPXCHG( float8, div, kmp_real64, 64, /, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div
902 ATOMIC_CMPXCHG( float8, mul, kmp_real64, 64, *, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul
903 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
904 
905 
906 /* ------------------------------------------------------------------------ */
907 /* Routines for C/C++ Reduction operators && and || */
908 /* ------------------------------------------------------------------------ */
909 
910 // ------------------------------------------------------------------------
911 // Need separate macros for &&, || because there is no combined assignment
912 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
913 #define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
914 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
915  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
916  OP_CRITICAL( = *lhs OP, LCK_ID ) \
917 }
918 
919 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
920 
921 // ------------------------------------------------------------------------
922 // X86 or X86_64: no alignment problems ===================================
923 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
924 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
925  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
926  OP_CMPXCHG(TYPE,BITS,OP) \
927 }
928 
929 #else
930 // ------------------------------------------------------------------------
931 // Code for other architectures that don't handle unaligned accesses.
932 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
933 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
934  OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG) \
935  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
936  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
937  } else { \
938  KMP_CHECK_GTID; \
939  OP_CRITICAL(= *lhs OP,LCK_ID) /* unaligned - use critical */ \
940  } \
941 }
942 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
943 
944 ATOMIC_CMPX_L( fixed1, andl, char, 8, &&, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl
945 ATOMIC_CMPX_L( fixed1, orl, char, 8, ||, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl
946 ATOMIC_CMPX_L( fixed2, andl, short, 16, &&, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl
947 ATOMIC_CMPX_L( fixed2, orl, short, 16, ||, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl
948 ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andl
949 ATOMIC_CMPX_L( fixed4, orl, kmp_int32, 32, ||, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orl
950 ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl
951 ATOMIC_CMPX_L( fixed8, orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl
952 
953 
954 /* ------------------------------------------------------------------------- */
955 /* Routines for Fortran operators that matched no one in C: */
956 /* MAX, MIN, .EQV., .NEQV. */
957 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
958 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
959 /* ------------------------------------------------------------------------- */
960 
961 // -------------------------------------------------------------------------
962 // MIN and MAX need separate macros
963 // OP - operator to check if we need any actions?
964 #define MIN_MAX_CRITSECT(OP,LCK_ID) \
965  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
966  \
967  if ( *lhs OP rhs ) { /* still need actions? */ \
968  *lhs = rhs; \
969  } \
970  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
971 
972 // -------------------------------------------------------------------------
973 #ifdef KMP_GOMP_COMPAT
974 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG) \
975  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
976  KMP_CHECK_GTID; \
977  MIN_MAX_CRITSECT( OP, 0 ); \
978  return; \
979  }
980 #else
981 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG)
982 #endif /* KMP_GOMP_COMPAT */
983 
984 // -------------------------------------------------------------------------
985 #define MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
986  { \
987  TYPE KMP_ATOMIC_VOLATILE temp_val; \
988  TYPE old_value; \
989  temp_val = *lhs; \
990  old_value = temp_val; \
991  while ( old_value OP rhs && /* still need actions? */ \
992  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
993  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
994  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
995  { \
996  KMP_CPU_PAUSE(); \
997  temp_val = *lhs; \
998  old_value = temp_val; \
999  } \
1000  }
1001 
1002 // -------------------------------------------------------------------------
1003 // 1-byte, 2-byte operands - use critical section
1004 #define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1005 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1006  if ( *lhs OP rhs ) { /* need actions? */ \
1007  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1008  MIN_MAX_CRITSECT(OP,LCK_ID) \
1009  } \
1010 }
1011 
1012 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1013 
1014 // -------------------------------------------------------------------------
1015 // X86 or X86_64: no alignment problems ====================================
1016 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1017 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1018  if ( *lhs OP rhs ) { \
1019  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1020  MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
1021  } \
1022 }
1023 
1024 #else
1025 // -------------------------------------------------------------------------
1026 // Code for other architectures that don't handle unaligned accesses.
1027 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1028 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1029  if ( *lhs OP rhs ) { \
1030  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1031  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1032  MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1033  } else { \
1034  KMP_CHECK_GTID; \
1035  MIN_MAX_CRITSECT(OP,LCK_ID) /* unaligned address */ \
1036  } \
1037  } \
1038 }
1039 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1040 
1041 MIN_MAX_COMPXCHG( fixed1, max, char, 8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max
1042 MIN_MAX_COMPXCHG( fixed1, min, char, 8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min
1043 MIN_MAX_COMPXCHG( fixed2, max, short, 16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max
1044 MIN_MAX_COMPXCHG( fixed2, min, short, 16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min
1045 MIN_MAX_COMPXCHG( fixed4, max, kmp_int32, 32, <, 4i, 3, 0 ) // __kmpc_atomic_fixed4_max
1046 MIN_MAX_COMPXCHG( fixed4, min, kmp_int32, 32, >, 4i, 3, 0 ) // __kmpc_atomic_fixed4_min
1047 MIN_MAX_COMPXCHG( fixed8, max, kmp_int64, 64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max
1048 MIN_MAX_COMPXCHG( fixed8, min, kmp_int64, 64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min
1049 MIN_MAX_COMPXCHG( float4, max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max
1050 MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min
1051 MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max
1052 MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min
1053 #if KMP_HAVE_QUAD
1054 MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max
1055 MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min
1056 #if ( KMP_ARCH_X86 )
1057  MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16
1058  MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16
1059 #endif
1060 #endif
1061 // ------------------------------------------------------------------------
1062 // Need separate macros for .EQV. because of the need of complement (~)
1063 // OP ignored for critical sections, ^=~ used instead
1064 #define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1065 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1066  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1067  OP_CRITICAL(^=~,LCK_ID) /* send assignment and complement */ \
1068 }
1069 
1070 // ------------------------------------------------------------------------
1071 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1072 // ------------------------------------------------------------------------
1073 // X86 or X86_64: no alignment problems ===================================
1074 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1075 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1076  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1077  OP_CMPXCHG(TYPE,BITS,OP) \
1078 }
1079 // ------------------------------------------------------------------------
1080 #else
1081 // ------------------------------------------------------------------------
1082 // Code for other architectures that don't handle unaligned accesses.
1083 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1084 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1085  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) \
1086  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1087  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1088  } else { \
1089  KMP_CHECK_GTID; \
1090  OP_CRITICAL(^=~,LCK_ID) /* unaligned address - use critical */ \
1091  } \
1092 }
1093 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1094 
1095 ATOMIC_CMPXCHG( fixed1, neqv, kmp_int8, 8, ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv
1096 ATOMIC_CMPXCHG( fixed2, neqv, kmp_int16, 16, ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv
1097 ATOMIC_CMPXCHG( fixed4, neqv, kmp_int32, 32, ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv
1098 ATOMIC_CMPXCHG( fixed8, neqv, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv
1099 ATOMIC_CMPX_EQV( fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv
1100 ATOMIC_CMPX_EQV( fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv
1101 ATOMIC_CMPX_EQV( fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv
1102 ATOMIC_CMPX_EQV( fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv
1103 
1104 
1105 // ------------------------------------------------------------------------
1106 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1107 // TYPE_ID, OP_ID, TYPE - detailed above
1108 // OP - operator
1109 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1110 #define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1111 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1112  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1113  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1114 }
1115 
1116 /* ------------------------------------------------------------------------- */
1117 // routines for long double type
1118 ATOMIC_CRITICAL( float10, add, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add
1119 ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub
1120 ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul
1121 ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div
1122 #if KMP_HAVE_QUAD
1123 // routines for _Quad type
1124 ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add
1125 ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub
1126 ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul
1127 ATOMIC_CRITICAL( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div
1128 #if ( KMP_ARCH_X86 )
1129  ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16
1130  ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16
1131  ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16
1132  ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16
1133 #endif
1134 #endif
1135 // routines for complex types
1136 
1137 #if USE_CMPXCHG_FIX
1138 // workaround for C78287 (complex(kind=4) data type)
1139 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add
1140 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub
1141 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul
1142 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div
1143 // end of the workaround for C78287
1144 #else
1145 ATOMIC_CRITICAL( cmplx4, add, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add
1146 ATOMIC_CRITICAL( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub
1147 ATOMIC_CRITICAL( cmplx4, mul, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul
1148 ATOMIC_CRITICAL( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div
1149 #endif // USE_CMPXCHG_FIX
1150 
1151 ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add
1152 ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub
1153 ATOMIC_CRITICAL( cmplx8, mul, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul
1154 ATOMIC_CRITICAL( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div
1155 ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add
1156 ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub
1157 ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul
1158 ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div
1159 #if KMP_HAVE_QUAD
1160 ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add
1161 ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub
1162 ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul
1163 ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div
1164 #if ( KMP_ARCH_X86 )
1165  ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16
1166  ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16
1167  ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16
1168  ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16
1169 #endif
1170 #endif
1171 
1172 #if OMP_40_ENABLED
1173 
1174 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1175 // Supported only on IA-32 architecture and Intel(R) 64
1176 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1177 
1178 // ------------------------------------------------------------------------
1179 // Operation on *lhs, rhs bound by critical section
1180 // OP - operator (it's supposed to contain an assignment)
1181 // LCK_ID - lock identifier
1182 // Note: don't check gtid as it should always be valid
1183 // 1, 2-byte - expect valid parameter, other - check before this macro
1184 #define OP_CRITICAL_REV(OP,LCK_ID) \
1185  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1186  \
1187  (*lhs) = (rhs) OP (*lhs); \
1188  \
1189  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1190 
1191 #ifdef KMP_GOMP_COMPAT
1192 #define OP_GOMP_CRITICAL_REV(OP,FLAG) \
1193  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1194  KMP_CHECK_GTID; \
1195  OP_CRITICAL_REV( OP, 0 ); \
1196  return; \
1197  }
1198 #else
1199 #define OP_GOMP_CRITICAL_REV(OP,FLAG)
1200 #endif /* KMP_GOMP_COMPAT */
1201 
1202 
1203 // Beginning of a definition (provides name, parameters, gebug trace)
1204 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1205 // OP_ID - operation identifier (add, sub, mul, ...)
1206 // TYPE - operands' type
1207 #define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1208 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
1209 { \
1210  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1211  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid ));
1212 
1213 // ------------------------------------------------------------------------
1214 // Operation on *lhs, rhs using "compare_and_store" routine
1215 // TYPE - operands' type
1216 // BITS - size in bits, used to distinguish low level calls
1217 // OP - operator
1218 // Note: temp_val introduced in order to force the compiler to read
1219 // *lhs only once (w/o it the compiler reads *lhs twice)
1220 #define OP_CMPXCHG_REV(TYPE,BITS,OP) \
1221  { \
1222  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1223  TYPE old_value, new_value; \
1224  temp_val = *lhs; \
1225  old_value = temp_val; \
1226  new_value = rhs OP old_value; \
1227  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1228  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1229  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1230  { \
1231  KMP_DO_PAUSE; \
1232  \
1233  temp_val = *lhs; \
1234  old_value = temp_val; \
1235  new_value = rhs OP old_value; \
1236  } \
1237  }
1238 
1239 // -------------------------------------------------------------------------
1240 #define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG) \
1241 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1242  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1243  OP_CMPXCHG_REV(TYPE,BITS,OP) \
1244 }
1245 
1246 // ------------------------------------------------------------------------
1247 // Entries definition for integer operands
1248 // TYPE_ID - operands type and size (fixed4, float4)
1249 // OP_ID - operation identifier (add, sub, mul, ...)
1250 // TYPE - operand type
1251 // BITS - size in bits, used to distinguish low level calls
1252 // OP - operator (used in critical section)
1253 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1254 
1255 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1256 // ------------------------------------------------------------------------
1257 // Routines for ATOMIC integer operands, other operators
1258 // ------------------------------------------------------------------------
1259 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1260 ATOMIC_CMPXCHG_REV( fixed1, div, kmp_int8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev
1261 ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev
1262 ATOMIC_CMPXCHG_REV( fixed1, shl, kmp_int8, 8, <<, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_rev
1263 ATOMIC_CMPXCHG_REV( fixed1, shr, kmp_int8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_rev
1264 ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_rev
1265 ATOMIC_CMPXCHG_REV( fixed1, sub, kmp_int8, 8, -, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev
1266 
1267 ATOMIC_CMPXCHG_REV( fixed2, div, kmp_int16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev
1268 ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev
1269 ATOMIC_CMPXCHG_REV( fixed2, shl, kmp_int16, 16, <<, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_rev
1270 ATOMIC_CMPXCHG_REV( fixed2, shr, kmp_int16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_rev
1271 ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_rev
1272 ATOMIC_CMPXCHG_REV( fixed2, sub, kmp_int16, 16, -, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev
1273 
1274 ATOMIC_CMPXCHG_REV( fixed4, div, kmp_int32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_rev
1275 ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_rev
1276 ATOMIC_CMPXCHG_REV( fixed4, shl, kmp_int32, 32, <<, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_rev
1277 ATOMIC_CMPXCHG_REV( fixed4, shr, kmp_int32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_rev
1278 ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_rev
1279 ATOMIC_CMPXCHG_REV( fixed4, sub, kmp_int32, 32, -, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_rev
1280 
1281 ATOMIC_CMPXCHG_REV( fixed8, div, kmp_int64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev
1282 ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev
1283 ATOMIC_CMPXCHG_REV( fixed8, shl, kmp_int64, 64, <<, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_rev
1284 ATOMIC_CMPXCHG_REV( fixed8, shr, kmp_int64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_rev
1285 ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_rev
1286 ATOMIC_CMPXCHG_REV( fixed8, sub, kmp_int64, 64, -, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev
1287 
1288 ATOMIC_CMPXCHG_REV( float4, div, kmp_real32, 32, /, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev
1289 ATOMIC_CMPXCHG_REV( float4, sub, kmp_real32, 32, -, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev
1290 
1291 ATOMIC_CMPXCHG_REV( float8, div, kmp_real64, 64, /, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev
1292 ATOMIC_CMPXCHG_REV( float8, sub, kmp_real64, 64, -, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev
1293 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1294 
1295 // ------------------------------------------------------------------------
1296 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1297 // TYPE_ID, OP_ID, TYPE - detailed above
1298 // OP - operator
1299 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1300 #define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1301 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1302  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1303  OP_CRITICAL_REV(OP,LCK_ID) \
1304 }
1305 
1306 /* ------------------------------------------------------------------------- */
1307 // routines for long double type
1308 ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev
1309 ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev
1310 #if KMP_HAVE_QUAD
1311 // routines for _Quad type
1312 ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev
1313 ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev
1314 #if ( KMP_ARCH_X86 )
1315  ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev
1316  ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev
1317 #endif
1318 #endif
1319 
1320 // routines for complex types
1321 ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev
1322 ATOMIC_CRITICAL_REV( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_rev
1323 ATOMIC_CRITICAL_REV( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_rev
1324 ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev
1325 ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev
1326 ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev
1327 #if KMP_HAVE_QUAD
1328 ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev
1329 ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev
1330 #if ( KMP_ARCH_X86 )
1331  ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev
1332  ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev
1333 #endif
1334 #endif
1335 
1336 
1337 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
1338 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1339 
1340 #endif //OMP_40_ENABLED
1341 
1342 
1343 /* ------------------------------------------------------------------------ */
1344 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1345 /* Note: in order to reduce the total number of types combinations */
1346 /* it is supposed that compiler converts RHS to longest floating type,*/
1347 /* that is _Quad, before call to any of these routines */
1348 /* Conversion to _Quad will be done by the compiler during calculation, */
1349 /* conversion back to TYPE - before the assignment, like: */
1350 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1351 /* Performance penalty expected because of SW emulation use */
1352 /* ------------------------------------------------------------------------ */
1353 
1354 #define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1355 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \
1356 { \
1357  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1358  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid ));
1359 
1360 // -------------------------------------------------------------------------
1361 #define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \
1362 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1363  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1364  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1365 }
1366 
1367 // -------------------------------------------------------------------------
1368 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1369 // -------------------------------------------------------------------------
1370 // X86 or X86_64: no alignment problems ====================================
1371 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1372 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1373  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1374  OP_CMPXCHG(TYPE,BITS,OP) \
1375 }
1376 // -------------------------------------------------------------------------
1377 #else
1378 // ------------------------------------------------------------------------
1379 // Code for other architectures that don't handle unaligned accesses.
1380 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1381 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1382  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1383  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1384  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1385  } else { \
1386  KMP_CHECK_GTID; \
1387  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1388  } \
1389 }
1390 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1391 
1392 // -------------------------------------------------------------------------
1393 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1394 // -------------------------------------------------------------------------
1395 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1396 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1397  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1398  OP_CMPXCHG_REV(TYPE,BITS,OP) \
1399 }
1400 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \
1401 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1402  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1403  OP_CRITICAL_REV(OP,LCK_ID) \
1404 }
1405 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1406 
1407 // RHS=float8
1408 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8
1409 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8
1410 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8
1411 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8
1412 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_float8
1413 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_float8
1414 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8
1415 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8
1416 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8
1417 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8
1418 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8
1419 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8
1420 
1421 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
1422 #if KMP_HAVE_QUAD
1423 ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp
1424 ATOMIC_CMPXCHG_MIX( fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_add_fp
1425 ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp
1426 ATOMIC_CMPXCHG_MIX( fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_fp
1427 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp
1428 ATOMIC_CMPXCHG_MIX( fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_mul_fp
1429 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp
1430 ATOMIC_CMPXCHG_MIX( fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp
1431 
1432 ATOMIC_CMPXCHG_MIX( fixed2, short, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp
1433 ATOMIC_CMPXCHG_MIX( fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_add_fp
1434 ATOMIC_CMPXCHG_MIX( fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp
1435 ATOMIC_CMPXCHG_MIX( fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_fp
1436 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp
1437 ATOMIC_CMPXCHG_MIX( fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_mul_fp
1438 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp
1439 ATOMIC_CMPXCHG_MIX( fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp
1440 
1441 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_fp
1442 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_add_fp
1443 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_fp
1444 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_sub_fp
1445 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_fp
1446 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_mul_fp
1447 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_fp
1448 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_fp
1449 
1450 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp
1451 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_add_fp
1452 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp
1453 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_fp
1454 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp
1455 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_mul_fp
1456 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp
1457 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp
1458 
1459 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp
1460 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp
1461 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp
1462 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp
1463 
1464 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp
1465 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp
1466 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp
1467 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp
1468 
1469 ATOMIC_CRITICAL_FP( float10, long double, add, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_fp
1470 ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp
1471 ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp
1472 ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp
1473 
1474 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1475 // Reverse operations
1476 ATOMIC_CMPXCHG_REV_MIX( fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev_fp
1477 ATOMIC_CMPXCHG_REV_MIX( fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_rev_fp
1478 ATOMIC_CMPXCHG_REV_MIX( fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev_fp
1479 ATOMIC_CMPXCHG_REV_MIX( fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev_fp
1480 
1481 ATOMIC_CMPXCHG_REV_MIX( fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev_fp
1482 ATOMIC_CMPXCHG_REV_MIX( fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_rev_fp
1483 ATOMIC_CMPXCHG_REV_MIX( fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev_fp
1484 ATOMIC_CMPXCHG_REV_MIX( fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev_fp
1485 
1486 ATOMIC_CMPXCHG_REV_MIX( fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_rev_fp
1487 ATOMIC_CMPXCHG_REV_MIX( fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_sub_rev_fp
1488 ATOMIC_CMPXCHG_REV_MIX( fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_rev_fp
1489 ATOMIC_CMPXCHG_REV_MIX( fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_rev_fp
1490 
1491 ATOMIC_CMPXCHG_REV_MIX( fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev_fp
1492 ATOMIC_CMPXCHG_REV_MIX( fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_rev_fp
1493 ATOMIC_CMPXCHG_REV_MIX( fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev_fp
1494 ATOMIC_CMPXCHG_REV_MIX( fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev_fp
1495 
1496 ATOMIC_CMPXCHG_REV_MIX( float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev_fp
1497 ATOMIC_CMPXCHG_REV_MIX( float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev_fp
1498 
1499 ATOMIC_CMPXCHG_REV_MIX( float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev_fp
1500 ATOMIC_CMPXCHG_REV_MIX( float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev_fp
1501 
1502 ATOMIC_CRITICAL_REV_FP( float10, long double, sub_rev, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_rev_fp
1503 ATOMIC_CRITICAL_REV_FP( float10, long double, div_rev, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_rev_fp
1504 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1505 
1506 #endif
1507 
1508 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1509 // ------------------------------------------------------------------------
1510 // X86 or X86_64: no alignment problems ====================================
1511 #if USE_CMPXCHG_FIX
1512 // workaround for C78287 (complex(kind=4) data type)
1513 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1514 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1515  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1516  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
1517 }
1518 // end of the second part of the workaround for C78287
1519 #else
1520 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1521 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1522  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1523  OP_CMPXCHG(TYPE,BITS,OP) \
1524 }
1525 #endif // USE_CMPXCHG_FIX
1526 #else
1527 // ------------------------------------------------------------------------
1528 // Code for other architectures that don't handle unaligned accesses.
1529 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1530 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1531  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1532  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1533  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1534  } else { \
1535  KMP_CHECK_GTID; \
1536  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1537  } \
1538 }
1539 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1540 
1541 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8
1542 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8
1543 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8
1544 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8
1545 
1546 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1547 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1548 
1550 // ------------------------------------------------------------------------
1551 // Atomic READ routines
1552 // ------------------------------------------------------------------------
1553 
1554 // ------------------------------------------------------------------------
1555 // Beginning of a definition (provides name, parameters, gebug trace)
1556 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1557 // OP_ID - operation identifier (add, sub, mul, ...)
1558 // TYPE - operands' type
1559 #define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1560 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \
1561 { \
1562  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1563  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1564 
1565 // ------------------------------------------------------------------------
1566 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1567 // TYPE - operands' type
1568 // BITS - size in bits, used to distinguish low level calls
1569 // OP - operator
1570 // Note: temp_val introduced in order to force the compiler to read
1571 // *lhs only once (w/o it the compiler reads *lhs twice)
1572 // TODO: check if it is still necessary
1573 // Return old value regardless of the result of "compare & swap# operation
1574 
1575 #define OP_CMPXCHG_READ(TYPE,BITS,OP) \
1576  { \
1577  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1578  union f_i_union { \
1579  TYPE f_val; \
1580  kmp_int##BITS i_val; \
1581  }; \
1582  union f_i_union old_value; \
1583  temp_val = *loc; \
1584  old_value.f_val = temp_val; \
1585  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \
1586  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val, \
1587  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \
1588  new_value = old_value.f_val; \
1589  return new_value; \
1590  }
1591 
1592 // -------------------------------------------------------------------------
1593 // Operation on *lhs, rhs bound by critical section
1594 // OP - operator (it's supposed to contain an assignment)
1595 // LCK_ID - lock identifier
1596 // Note: don't check gtid as it should always be valid
1597 // 1, 2-byte - expect valid parameter, other - check before this macro
1598 #define OP_CRITICAL_READ(OP,LCK_ID) \
1599  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1600  \
1601  new_value = (*loc); \
1602  \
1603  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1604 
1605 // -------------------------------------------------------------------------
1606 #ifdef KMP_GOMP_COMPAT
1607 #define OP_GOMP_CRITICAL_READ(OP,FLAG) \
1608  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1609  KMP_CHECK_GTID; \
1610  OP_CRITICAL_READ( OP, 0 ); \
1611  return new_value; \
1612  }
1613 #else
1614 #define OP_GOMP_CRITICAL_READ(OP,FLAG)
1615 #endif /* KMP_GOMP_COMPAT */
1616 
1617 // -------------------------------------------------------------------------
1618 #define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1619 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1620  TYPE new_value; \
1621  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1622  new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 ); \
1623  return new_value; \
1624 }
1625 // -------------------------------------------------------------------------
1626 #define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1627 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1628  TYPE new_value; \
1629  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1630  OP_CMPXCHG_READ(TYPE,BITS,OP) \
1631 }
1632 // ------------------------------------------------------------------------
1633 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1634 // TYPE_ID, OP_ID, TYPE - detailed above
1635 // OP - operator
1636 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1637 #define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1638 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1639  TYPE new_value; \
1640  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) /* send assignment */ \
1641  OP_CRITICAL_READ(OP,LCK_ID) /* send assignment */ \
1642  return new_value; \
1643 }
1644 
1645 // ------------------------------------------------------------------------
1646 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work.
1647 // Let's return the read value through the additional parameter.
1648 
1649 #if ( KMP_OS_WINDOWS )
1650 
1651 #define OP_CRITICAL_READ_WRK(OP,LCK_ID) \
1652  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1653  \
1654  (*out) = (*loc); \
1655  \
1656  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1657 // ------------------------------------------------------------------------
1658 #ifdef KMP_GOMP_COMPAT
1659 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) \
1660  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1661  KMP_CHECK_GTID; \
1662  OP_CRITICAL_READ_WRK( OP, 0 ); \
1663  }
1664 #else
1665 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)
1666 #endif /* KMP_GOMP_COMPAT */
1667 // ------------------------------------------------------------------------
1668 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1669 void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \
1670 { \
1671  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1672  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1673 
1674 // ------------------------------------------------------------------------
1675 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1676 ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1677  OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG) /* send assignment */ \
1678  OP_CRITICAL_READ_WRK(OP,LCK_ID) /* send assignment */ \
1679 }
1680 
1681 #endif // KMP_OS_WINDOWS
1682 
1683 // ------------------------------------------------------------------------
1684 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1685 ATOMIC_FIXED_READ( fixed4, rd, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_rd
1686 ATOMIC_FIXED_READ( fixed8, rd, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_rd
1687 ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_rd
1688 ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_rd
1689 
1690 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
1691 ATOMIC_CMPXCHG_READ( fixed1, rd, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_rd
1692 ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd
1693 
1694 ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd
1695 #if KMP_HAVE_QUAD
1696 ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd
1697 #endif // KMP_HAVE_QUAD
1698 
1699 // Fix for CQ220361 on Windows* OS
1700 #if ( KMP_OS_WINDOWS )
1701  ATOMIC_CRITICAL_READ_WRK( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1702 #else
1703  ATOMIC_CRITICAL_READ( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1704 #endif
1705 ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd
1706 ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd
1707 #if KMP_HAVE_QUAD
1708 ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd
1709 #if ( KMP_ARCH_X86 )
1710  ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd
1711  ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd
1712 #endif
1713 #endif
1714 
1715 
1716 // ------------------------------------------------------------------------
1717 // Atomic WRITE routines
1718 // ------------------------------------------------------------------------
1719 
1720 #define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1721 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1722  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1723  KMP_XCHG_FIXED##BITS( lhs, rhs ); \
1724 }
1725 // ------------------------------------------------------------------------
1726 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1727 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1728  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1729  KMP_XCHG_REAL##BITS( lhs, rhs ); \
1730 }
1731 
1732 
1733 // ------------------------------------------------------------------------
1734 // Operation on *lhs, rhs using "compare_and_store" routine
1735 // TYPE - operands' type
1736 // BITS - size in bits, used to distinguish low level calls
1737 // OP - operator
1738 // Note: temp_val introduced in order to force the compiler to read
1739 // *lhs only once (w/o it the compiler reads *lhs twice)
1740 #define OP_CMPXCHG_WR(TYPE,BITS,OP) \
1741  { \
1742  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1743  TYPE old_value, new_value; \
1744  temp_val = *lhs; \
1745  old_value = temp_val; \
1746  new_value = rhs; \
1747  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1748  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1749  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1750  { \
1751  KMP_CPU_PAUSE(); \
1752  \
1753  temp_val = *lhs; \
1754  old_value = temp_val; \
1755  new_value = rhs; \
1756  } \
1757  }
1758 
1759 // -------------------------------------------------------------------------
1760 #define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1761 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1762  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1763  OP_CMPXCHG_WR(TYPE,BITS,OP) \
1764 }
1765 
1766 // ------------------------------------------------------------------------
1767 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1768 // TYPE_ID, OP_ID, TYPE - detailed above
1769 // OP - operator
1770 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1771 #define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1772 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1773  OP_GOMP_CRITICAL(OP,GOMP_FLAG) /* send assignment */ \
1774  OP_CRITICAL(OP,LCK_ID) /* send assignment */ \
1775 }
1776 // -------------------------------------------------------------------------
1777 
1778 ATOMIC_XCHG_WR( fixed1, wr, kmp_int8, 8, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_wr
1779 ATOMIC_XCHG_WR( fixed2, wr, kmp_int16, 16, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_wr
1780 ATOMIC_XCHG_WR( fixed4, wr, kmp_int32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_wr
1781 #if ( KMP_ARCH_X86 )
1782  ATOMIC_CMPXCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1783 #else
1784  ATOMIC_XCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1785 #endif
1786 
1787 ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_float4_wr
1788 #if ( KMP_ARCH_X86 )
1789  ATOMIC_CMPXCHG_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1790 #else
1791  ATOMIC_XCHG_FLOAT_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1792 #endif
1793 
1794 ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr
1795 #if KMP_HAVE_QUAD
1796 ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr
1797 #endif
1798 ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr
1799 ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr
1800 ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr
1801 #if KMP_HAVE_QUAD
1802 ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr
1803 #if ( KMP_ARCH_X86 )
1804  ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr
1805  ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr
1806 #endif
1807 #endif
1808 
1809 
1810 // ------------------------------------------------------------------------
1811 // Atomic CAPTURE routines
1812 // ------------------------------------------------------------------------
1813 
1814 // Beginning of a definition (provides name, parameters, gebug trace)
1815 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1816 // OP_ID - operation identifier (add, sub, mul, ...)
1817 // TYPE - operands' type
1818 #define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE) \
1819 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \
1820 { \
1821  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1822  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1823 
1824 // -------------------------------------------------------------------------
1825 // Operation on *lhs, rhs bound by critical section
1826 // OP - operator (it's supposed to contain an assignment)
1827 // LCK_ID - lock identifier
1828 // Note: don't check gtid as it should always be valid
1829 // 1, 2-byte - expect valid parameter, other - check before this macro
1830 #define OP_CRITICAL_CPT(OP,LCK_ID) \
1831  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1832  \
1833  if( flag ) { \
1834  (*lhs) OP rhs; \
1835  new_value = (*lhs); \
1836  } else { \
1837  new_value = (*lhs); \
1838  (*lhs) OP rhs; \
1839  } \
1840  \
1841  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1842  return new_value;
1843 
1844 // ------------------------------------------------------------------------
1845 #ifdef KMP_GOMP_COMPAT
1846 #define OP_GOMP_CRITICAL_CPT(OP,FLAG) \
1847  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1848  KMP_CHECK_GTID; \
1849  OP_CRITICAL_CPT( OP##=, 0 ); \
1850  }
1851 #else
1852 #define OP_GOMP_CRITICAL_CPT(OP,FLAG)
1853 #endif /* KMP_GOMP_COMPAT */
1854 
1855 // ------------------------------------------------------------------------
1856 // Operation on *lhs, rhs using "compare_and_store" routine
1857 // TYPE - operands' type
1858 // BITS - size in bits, used to distinguish low level calls
1859 // OP - operator
1860 // Note: temp_val introduced in order to force the compiler to read
1861 // *lhs only once (w/o it the compiler reads *lhs twice)
1862 #define OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1863  { \
1864  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1865  TYPE old_value, new_value; \
1866  temp_val = *lhs; \
1867  old_value = temp_val; \
1868  new_value = old_value OP rhs; \
1869  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1870  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1871  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1872  { \
1873  KMP_CPU_PAUSE(); \
1874  \
1875  temp_val = *lhs; \
1876  old_value = temp_val; \
1877  new_value = old_value OP rhs; \
1878  } \
1879  if( flag ) { \
1880  return new_value; \
1881  } else \
1882  return old_value; \
1883  }
1884 
1885 // -------------------------------------------------------------------------
1886 #define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1887 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1888  TYPE new_value; \
1889  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1890  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1891 }
1892 
1893 // -------------------------------------------------------------------------
1894 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1895 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1896  TYPE old_value, new_value; \
1897  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1898  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
1899  old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
1900  if( flag ) { \
1901  return old_value OP rhs; \
1902  } else \
1903  return old_value; \
1904 }
1905 // -------------------------------------------------------------------------
1906 
1907 ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_add_cpt
1908 ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32, 32, -, 0 ) // __kmpc_atomic_fixed4_sub_cpt
1909 ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt
1910 ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt
1911 
1912 ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt
1913 ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt
1914 ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt
1915 ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt
1916 
1917 // ------------------------------------------------------------------------
1918 // Entries definition for integer operands
1919 // TYPE_ID - operands type and size (fixed4, float4)
1920 // OP_ID - operation identifier (add, sub, mul, ...)
1921 // TYPE - operand type
1922 // BITS - size in bits, used to distinguish low level calls
1923 // OP - operator (used in critical section)
1924 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
1925 // ------------------------------------------------------------------------
1926 // Routines for ATOMIC integer operands, other operators
1927 // ------------------------------------------------------------------------
1928 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1929 ATOMIC_CMPXCHG_CPT( fixed1, add_cpt, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt
1930 ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8, 8, &, 0 ) // __kmpc_atomic_fixed1_andb_cpt
1931 ATOMIC_CMPXCHG_CPT( fixed1, div_cpt, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt
1932 ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt
1933 ATOMIC_CMPXCHG_CPT( fixed1, mul_cpt, kmp_int8, 8, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt
1934 ATOMIC_CMPXCHG_CPT( fixed1, orb_cpt, kmp_int8, 8, |, 0 ) // __kmpc_atomic_fixed1_orb_cpt
1935 ATOMIC_CMPXCHG_CPT( fixed1, shl_cpt, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt
1936 ATOMIC_CMPXCHG_CPT( fixed1, shr_cpt, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt
1937 ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt
1938 ATOMIC_CMPXCHG_CPT( fixed1, sub_cpt, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt
1939 ATOMIC_CMPXCHG_CPT( fixed1, xor_cpt, kmp_int8, 8, ^, 0 ) // __kmpc_atomic_fixed1_xor_cpt
1940 ATOMIC_CMPXCHG_CPT( fixed2, add_cpt, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt
1941 ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16, 16, &, 0 ) // __kmpc_atomic_fixed2_andb_cpt
1942 ATOMIC_CMPXCHG_CPT( fixed2, div_cpt, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt
1943 ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt
1944 ATOMIC_CMPXCHG_CPT( fixed2, mul_cpt, kmp_int16, 16, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt
1945 ATOMIC_CMPXCHG_CPT( fixed2, orb_cpt, kmp_int16, 16, |, 0 ) // __kmpc_atomic_fixed2_orb_cpt
1946 ATOMIC_CMPXCHG_CPT( fixed2, shl_cpt, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt
1947 ATOMIC_CMPXCHG_CPT( fixed2, shr_cpt, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt
1948 ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt
1949 ATOMIC_CMPXCHG_CPT( fixed2, sub_cpt, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt
1950 ATOMIC_CMPXCHG_CPT( fixed2, xor_cpt, kmp_int16, 16, ^, 0 ) // __kmpc_atomic_fixed2_xor_cpt
1951 ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32, 32, &, 0 ) // __kmpc_atomic_fixed4_andb_cpt
1952 ATOMIC_CMPXCHG_CPT( fixed4, div_cpt, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt
1953 ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt
1954 ATOMIC_CMPXCHG_CPT( fixed4, mul_cpt, kmp_int32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul_cpt
1955 ATOMIC_CMPXCHG_CPT( fixed4, orb_cpt, kmp_int32, 32, |, 0 ) // __kmpc_atomic_fixed4_orb_cpt
1956 ATOMIC_CMPXCHG_CPT( fixed4, shl_cpt, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt
1957 ATOMIC_CMPXCHG_CPT( fixed4, shr_cpt, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt
1958 ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt
1959 ATOMIC_CMPXCHG_CPT( fixed4, xor_cpt, kmp_int32, 32, ^, 0 ) // __kmpc_atomic_fixed4_xor_cpt
1960 ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64, 64, &, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb_cpt
1961 ATOMIC_CMPXCHG_CPT( fixed8, div_cpt, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt
1962 ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt
1963 ATOMIC_CMPXCHG_CPT( fixed8, mul_cpt, kmp_int64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt
1964 ATOMIC_CMPXCHG_CPT( fixed8, orb_cpt, kmp_int64, 64, |, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb_cpt
1965 ATOMIC_CMPXCHG_CPT( fixed8, shl_cpt, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt
1966 ATOMIC_CMPXCHG_CPT( fixed8, shr_cpt, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt
1967 ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt
1968 ATOMIC_CMPXCHG_CPT( fixed8, xor_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor_cpt
1969 ATOMIC_CMPXCHG_CPT( float4, div_cpt, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt
1970 ATOMIC_CMPXCHG_CPT( float4, mul_cpt, kmp_real32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt
1971 ATOMIC_CMPXCHG_CPT( float8, div_cpt, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt
1972 ATOMIC_CMPXCHG_CPT( float8, mul_cpt, kmp_real64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt
1973 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1974 
1976 
1977 // CAPTURE routines for mixed types RHS=float16
1978 #if KMP_HAVE_QUAD
1979 
1980 // Beginning of a definition (provides name, parameters, gebug trace)
1981 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1982 // OP_ID - operation identifier (add, sub, mul, ...)
1983 // TYPE - operands' type
1984 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE) \
1985 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs, int flag ) \
1986 { \
1987  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1988  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid ));
1989 
1990 // -------------------------------------------------------------------------
1991 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1992 ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE) \
1993  TYPE new_value; \
1994  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1995  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1996 }
1997 
1998 // -------------------------------------------------------------------------
1999 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \
2000 ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE) \
2001  TYPE new_value; \
2002  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \
2003  OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \
2004 }
2005 
2006 ATOMIC_CMPXCHG_CPT_MIX( fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt_fp
2007 ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_add_cpt_fp
2008 ATOMIC_CMPXCHG_CPT_MIX( fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_fp
2009 ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_cpt_fp
2010 ATOMIC_CMPXCHG_CPT_MIX( fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt_fp
2011 ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_mul_cpt_fp
2012 ATOMIC_CMPXCHG_CPT_MIX( fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_fp
2013 ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_fp
2014 
2015 ATOMIC_CMPXCHG_CPT_MIX( fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt_fp
2016 ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_add_cpt_fp
2017 ATOMIC_CMPXCHG_CPT_MIX( fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_fp
2018 ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_cpt_fp
2019 ATOMIC_CMPXCHG_CPT_MIX( fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt_fp
2020 ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_mul_cpt_fp
2021 ATOMIC_CMPXCHG_CPT_MIX( fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_fp
2022 ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_fp
2023 
2024 ATOMIC_CMPXCHG_CPT_MIX( fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_cpt_fp
2025 ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_add_cpt_fp
2026 ATOMIC_CMPXCHG_CPT_MIX( fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_cpt_fp
2027 ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_sub_cpt_fp
2028 ATOMIC_CMPXCHG_CPT_MIX( fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_cpt_fp
2029 ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_mul_cpt_fp
2030 ATOMIC_CMPXCHG_CPT_MIX( fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_cpt_fp
2031 ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_cpt_fp
2032 
2033 ATOMIC_CMPXCHG_CPT_MIX( fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt_fp
2034 ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_add_cpt_fp
2035 ATOMIC_CMPXCHG_CPT_MIX( fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_fp
2036 ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_cpt_fp
2037 ATOMIC_CMPXCHG_CPT_MIX( fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt_fp
2038 ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_mul_cpt_fp
2039 ATOMIC_CMPXCHG_CPT_MIX( fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_fp
2040 ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_fp
2041 
2042 ATOMIC_CMPXCHG_CPT_MIX( float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt_fp
2043 ATOMIC_CMPXCHG_CPT_MIX( float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_fp
2044 ATOMIC_CMPXCHG_CPT_MIX( float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt_fp
2045 ATOMIC_CMPXCHG_CPT_MIX( float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_fp
2046 
2047 ATOMIC_CMPXCHG_CPT_MIX( float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt_fp
2048 ATOMIC_CMPXCHG_CPT_MIX( float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_fp
2049 ATOMIC_CMPXCHG_CPT_MIX( float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt_fp
2050 ATOMIC_CMPXCHG_CPT_MIX( float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_fp
2051 
2052 ATOMIC_CRITICAL_CPT_MIX( float10, long double, add_cpt, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_cpt_fp
2053 ATOMIC_CRITICAL_CPT_MIX( float10, long double, sub_cpt, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_fp
2054 ATOMIC_CRITICAL_CPT_MIX( float10, long double, mul_cpt, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt_fp
2055 ATOMIC_CRITICAL_CPT_MIX( float10, long double, div_cpt, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_fp
2056 
2057 #endif //KMP_HAVE_QUAD
2058 
2060 
2061 // ------------------------------------------------------------------------
2062 // Routines for C/C++ Reduction operators && and ||
2063 // ------------------------------------------------------------------------
2064 
2065 // -------------------------------------------------------------------------
2066 // Operation on *lhs, rhs bound by critical section
2067 // OP - operator (it's supposed to contain an assignment)
2068 // LCK_ID - lock identifier
2069 // Note: don't check gtid as it should always be valid
2070 // 1, 2-byte - expect valid parameter, other - check before this macro
2071 #define OP_CRITICAL_L_CPT(OP,LCK_ID) \
2072  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2073  \
2074  if( flag ) { \
2075  new_value OP rhs; \
2076  } else \
2077  new_value = (*lhs); \
2078  \
2079  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
2080 
2081 // ------------------------------------------------------------------------
2082 #ifdef KMP_GOMP_COMPAT
2083 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) \
2084  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2085  KMP_CHECK_GTID; \
2086  OP_CRITICAL_L_CPT( OP, 0 ); \
2087  return new_value; \
2088  }
2089 #else
2090 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)
2091 #endif /* KMP_GOMP_COMPAT */
2092 
2093 // ------------------------------------------------------------------------
2094 // Need separate macros for &&, || because there is no combined assignment
2095 #define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2096 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2097  TYPE new_value; \
2098  OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG ) \
2099  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
2100 }
2101 
2102 ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char, 8, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl_cpt
2103 ATOMIC_CMPX_L_CPT( fixed1, orl_cpt, char, 8, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl_cpt
2104 ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short, 16, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl_cpt
2105 ATOMIC_CMPX_L_CPT( fixed2, orl_cpt, short, 16, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl_cpt
2106 ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 ) // __kmpc_atomic_fixed4_andl_cpt
2107 ATOMIC_CMPX_L_CPT( fixed4, orl_cpt, kmp_int32, 32, ||, 0 ) // __kmpc_atomic_fixed4_orl_cpt
2108 ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl_cpt
2109 ATOMIC_CMPX_L_CPT( fixed8, orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl_cpt
2110 
2111 
2112 // -------------------------------------------------------------------------
2113 // Routines for Fortran operators that matched no one in C:
2114 // MAX, MIN, .EQV., .NEQV.
2115 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2116 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2117 // -------------------------------------------------------------------------
2118 
2119 // -------------------------------------------------------------------------
2120 // MIN and MAX need separate macros
2121 // OP - operator to check if we need any actions?
2122 #define MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
2123  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2124  \
2125  if ( *lhs OP rhs ) { /* still need actions? */ \
2126  old_value = *lhs; \
2127  *lhs = rhs; \
2128  if ( flag ) \
2129  new_value = rhs; \
2130  else \
2131  new_value = old_value; \
2132  } \
2133  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2134  return new_value; \
2135 
2136 // -------------------------------------------------------------------------
2137 #ifdef KMP_GOMP_COMPAT
2138 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) \
2139  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
2140  KMP_CHECK_GTID; \
2141  MIN_MAX_CRITSECT_CPT( OP, 0 ); \
2142  }
2143 #else
2144 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)
2145 #endif /* KMP_GOMP_COMPAT */
2146 
2147 // -------------------------------------------------------------------------
2148 #define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2149  { \
2150  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2151  /*TYPE old_value; */ \
2152  temp_val = *lhs; \
2153  old_value = temp_val; \
2154  while ( old_value OP rhs && /* still need actions? */ \
2155  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2156  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2157  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
2158  { \
2159  KMP_CPU_PAUSE(); \
2160  temp_val = *lhs; \
2161  old_value = temp_val; \
2162  } \
2163  if( flag ) \
2164  return rhs; \
2165  else \
2166  return old_value; \
2167  }
2168 
2169 // -------------------------------------------------------------------------
2170 // 1-byte, 2-byte operands - use critical section
2171 #define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2172 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2173  TYPE new_value, old_value; \
2174  if ( *lhs OP rhs ) { /* need actions? */ \
2175  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2176  MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
2177  } \
2178  return *lhs; \
2179 }
2180 
2181 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2182 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2183  TYPE new_value, old_value; \
2184  if ( *lhs OP rhs ) { \
2185  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2186  MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2187  } \
2188  return *lhs; \
2189 }
2190 
2191 
2192 MIN_MAX_COMPXCHG_CPT( fixed1, max_cpt, char, 8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt
2193 MIN_MAX_COMPXCHG_CPT( fixed1, min_cpt, char, 8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt
2194 MIN_MAX_COMPXCHG_CPT( fixed2, max_cpt, short, 16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt
2195 MIN_MAX_COMPXCHG_CPT( fixed2, min_cpt, short, 16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt
2196 MIN_MAX_COMPXCHG_CPT( fixed4, max_cpt, kmp_int32, 32, <, 0 ) // __kmpc_atomic_fixed4_max_cpt
2197 MIN_MAX_COMPXCHG_CPT( fixed4, min_cpt, kmp_int32, 32, >, 0 ) // __kmpc_atomic_fixed4_min_cpt
2198 MIN_MAX_COMPXCHG_CPT( fixed8, max_cpt, kmp_int64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt
2199 MIN_MAX_COMPXCHG_CPT( fixed8, min_cpt, kmp_int64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt
2200 MIN_MAX_COMPXCHG_CPT( float4, max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt
2201 MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt
2202 MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt
2203 MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt
2204 #if KMP_HAVE_QUAD
2205 MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt
2206 MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt
2207 #if ( KMP_ARCH_X86 )
2208  MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt
2209  MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt
2210 #endif
2211 #endif
2212 
2213 // ------------------------------------------------------------------------
2214 #ifdef KMP_GOMP_COMPAT
2215 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) \
2216  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2217  KMP_CHECK_GTID; \
2218  OP_CRITICAL_CPT( OP, 0 ); \
2219  }
2220 #else
2221 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)
2222 #endif /* KMP_GOMP_COMPAT */
2223 // ------------------------------------------------------------------------
2224 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2225 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2226  TYPE new_value; \
2227  OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG) /* send assignment */ \
2228  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
2229 }
2230 
2231 // ------------------------------------------------------------------------
2232 
2233 ATOMIC_CMPXCHG_CPT( fixed1, neqv_cpt, kmp_int8, 8, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt
2234 ATOMIC_CMPXCHG_CPT( fixed2, neqv_cpt, kmp_int16, 16, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt
2235 ATOMIC_CMPXCHG_CPT( fixed4, neqv_cpt, kmp_int32, 32, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt
2236 ATOMIC_CMPXCHG_CPT( fixed8, neqv_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt
2237 ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt, kmp_int8, 8, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt
2238 ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt, kmp_int16, 16, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt
2239 ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt, kmp_int32, 32, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt
2240 ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt, kmp_int64, 64, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt
2241 
2242 // ------------------------------------------------------------------------
2243 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2244 // TYPE_ID, OP_ID, TYPE - detailed above
2245 // OP - operator
2246 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2247 #define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2248 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2249  TYPE new_value; \
2250  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \
2251  OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \
2252 }
2253 
2254 // ------------------------------------------------------------------------
2255 
2256 // Workaround for cmplx4. Regular routines with return value don't work
2257 // on Win_32e. Let's return captured values through the additional parameter.
2258 #define OP_CRITICAL_CPT_WRK(OP,LCK_ID) \
2259  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2260  \
2261  if( flag ) { \
2262  (*lhs) OP rhs; \
2263  (*out) = (*lhs); \
2264  } else { \
2265  (*out) = (*lhs); \
2266  (*lhs) OP rhs; \
2267  } \
2268  \
2269  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2270  return;
2271 // ------------------------------------------------------------------------
2272 
2273 #ifdef KMP_GOMP_COMPAT
2274 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) \
2275  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2276  KMP_CHECK_GTID; \
2277  OP_CRITICAL_CPT_WRK( OP##=, 0 ); \
2278  }
2279 #else
2280 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)
2281 #endif /* KMP_GOMP_COMPAT */
2282 // ------------------------------------------------------------------------
2283 
2284 #define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2285 void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \
2286 { \
2287  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2288  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
2289 // ------------------------------------------------------------------------
2290 
2291 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2292 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2293  OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG) \
2294  OP_CRITICAL_CPT_WRK(OP##=,LCK_ID) \
2295 }
2296 // The end of workaround for cmplx4
2297 
2298 /* ------------------------------------------------------------------------- */
2299 // routines for long double type
2300 ATOMIC_CRITICAL_CPT( float10, add_cpt, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add_cpt
2301 ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt
2302 ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt
2303 ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt
2304 #if KMP_HAVE_QUAD
2305 // routines for _Quad type
2306 ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt
2307 ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt
2308 ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul_cpt
2309 ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt
2310 #if ( KMP_ARCH_X86 )
2311  ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16_cpt
2312  ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt
2313  ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt
2314  ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt
2315 #endif
2316 #endif
2317 
2318 // routines for complex types
2319 
2320 // cmplx4 routines to return void
2321 ATOMIC_CRITICAL_CPT_WRK( cmplx4, add_cpt, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add_cpt
2322 ATOMIC_CRITICAL_CPT_WRK( cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt
2323 ATOMIC_CRITICAL_CPT_WRK( cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul_cpt
2324 ATOMIC_CRITICAL_CPT_WRK( cmplx4, div_cpt, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt
2325 
2326 ATOMIC_CRITICAL_CPT( cmplx8, add_cpt, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add_cpt
2327 ATOMIC_CRITICAL_CPT( cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt
2328 ATOMIC_CRITICAL_CPT( cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul_cpt
2329 ATOMIC_CRITICAL_CPT( cmplx8, div_cpt, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt
2330 ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add_cpt
2331 ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt
2332 ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt
2333 ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt
2334 #if KMP_HAVE_QUAD
2335 ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt
2336 ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt
2337 ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt
2338 ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt
2339 #if ( KMP_ARCH_X86 )
2340  ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16_cpt
2341  ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt
2342  ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt
2343  ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt
2344 #endif
2345 #endif
2346 
2347 #if OMP_40_ENABLED
2348 
2349 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations.
2350 // Supported only on IA-32 architecture and Intel(R) 64
2351 
2352 // -------------------------------------------------------------------------
2353 // Operation on *lhs, rhs bound by critical section
2354 // OP - operator (it's supposed to contain an assignment)
2355 // LCK_ID - lock identifier
2356 // Note: don't check gtid as it should always be valid
2357 // 1, 2-byte - expect valid parameter, other - check before this macro
2358 #define OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2359  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2360  \
2361  if( flag ) { \
2362  /*temp_val = (*lhs);*/\
2363  (*lhs) = (rhs) OP (*lhs); \
2364  new_value = (*lhs); \
2365  } else { \
2366  new_value = (*lhs);\
2367  (*lhs) = (rhs) OP (*lhs); \
2368  } \
2369  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2370  return new_value;
2371 
2372 // ------------------------------------------------------------------------
2373 #ifdef KMP_GOMP_COMPAT
2374 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) \
2375  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2376  KMP_CHECK_GTID; \
2377  OP_CRITICAL_CPT_REV( OP, 0 ); \
2378  }
2379 #else
2380 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)
2381 #endif /* KMP_GOMP_COMPAT */
2382 
2383 // ------------------------------------------------------------------------
2384 // Operation on *lhs, rhs using "compare_and_store" routine
2385 // TYPE - operands' type
2386 // BITS - size in bits, used to distinguish low level calls
2387 // OP - operator
2388 // Note: temp_val introduced in order to force the compiler to read
2389 // *lhs only once (w/o it the compiler reads *lhs twice)
2390 #define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2391  { \
2392  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2393  TYPE old_value, new_value; \
2394  temp_val = *lhs; \
2395  old_value = temp_val; \
2396  new_value = rhs OP old_value; \
2397  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2398  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2399  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2400  { \
2401  KMP_CPU_PAUSE(); \
2402  \
2403  temp_val = *lhs; \
2404  old_value = temp_val; \
2405  new_value = rhs OP old_value; \
2406  } \
2407  if( flag ) { \
2408  return new_value; \
2409  } else \
2410  return old_value; \
2411  }
2412 
2413 // -------------------------------------------------------------------------
2414 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2415 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2416  TYPE new_value; \
2417  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2418  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2419  OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2420 }
2421 
2422 
2423 ATOMIC_CMPXCHG_CPT_REV( fixed1, div_cpt_rev, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev
2424 ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev
2425 ATOMIC_CMPXCHG_CPT_REV( fixed1, shl_cpt_rev, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt_rev
2426 ATOMIC_CMPXCHG_CPT_REV( fixed1, shr_cpt_rev, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt_rev
2427 ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt_rev
2428 ATOMIC_CMPXCHG_CPT_REV( fixed1, sub_cpt_rev, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev
2429 ATOMIC_CMPXCHG_CPT_REV( fixed2, div_cpt_rev, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev
2430 ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev
2431 ATOMIC_CMPXCHG_CPT_REV( fixed2, shl_cpt_rev, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt_rev
2432 ATOMIC_CMPXCHG_CPT_REV( fixed2, shr_cpt_rev, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt_rev
2433 ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt_rev
2434 ATOMIC_CMPXCHG_CPT_REV( fixed2, sub_cpt_rev, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev
2435 ATOMIC_CMPXCHG_CPT_REV( fixed4, div_cpt_rev, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt_rev
2436 ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt_rev
2437 ATOMIC_CMPXCHG_CPT_REV( fixed4, shl_cpt_rev, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt_rev
2438 ATOMIC_CMPXCHG_CPT_REV( fixed4, shr_cpt_rev, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt_rev
2439 ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt_rev
2440 ATOMIC_CMPXCHG_CPT_REV( fixed4, sub_cpt_rev, kmp_int32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_cpt_rev
2441 ATOMIC_CMPXCHG_CPT_REV( fixed8, div_cpt_rev, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev
2442 ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev
2443 ATOMIC_CMPXCHG_CPT_REV( fixed8, shl_cpt_rev, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt_rev
2444 ATOMIC_CMPXCHG_CPT_REV( fixed8, shr_cpt_rev, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt_rev
2445 ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt_rev
2446 ATOMIC_CMPXCHG_CPT_REV( fixed8, sub_cpt_rev, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev
2447 ATOMIC_CMPXCHG_CPT_REV( float4, div_cpt_rev, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev
2448 ATOMIC_CMPXCHG_CPT_REV( float4, sub_cpt_rev, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev
2449 ATOMIC_CMPXCHG_CPT_REV( float8, div_cpt_rev, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev
2450 ATOMIC_CMPXCHG_CPT_REV( float8, sub_cpt_rev, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev
2451 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2452 
2453 
2454 // ------------------------------------------------------------------------
2455 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2456 // TYPE_ID, OP_ID, TYPE - detailed above
2457 // OP - operator
2458 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2459 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2460 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2461  TYPE new_value; \
2462  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2463  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\
2464  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2465  OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2466 }
2467 
2468 
2469 /* ------------------------------------------------------------------------- */
2470 // routines for long double type
2471 ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev
2472 ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev
2473 #if KMP_HAVE_QUAD
2474 // routines for _Quad type
2475 ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev
2476 ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev
2477 #if ( KMP_ARCH_X86 )
2478  ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev
2479  ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev
2480 #endif
2481 #endif
2482 
2483 // routines for complex types
2484 
2485 // ------------------------------------------------------------------------
2486 
2487 // Workaround for cmplx4. Regular routines with return value don't work
2488 // on Win_32e. Let's return captured values through the additional parameter.
2489 #define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2490  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2491  \
2492  if( flag ) { \
2493  (*lhs) = (rhs) OP (*lhs); \
2494  (*out) = (*lhs); \
2495  } else { \
2496  (*out) = (*lhs); \
2497  (*lhs) = (rhs) OP (*lhs); \
2498  } \
2499  \
2500  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2501  return;
2502 // ------------------------------------------------------------------------
2503 
2504 #ifdef KMP_GOMP_COMPAT
2505 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) \
2506  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2507  KMP_CHECK_GTID; \
2508  OP_CRITICAL_CPT_REV_WRK( OP, 0 ); \
2509  }
2510 #else
2511 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)
2512 #endif /* KMP_GOMP_COMPAT */
2513 // ------------------------------------------------------------------------
2514 
2515 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2516 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2517  OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG) \
2518  OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2519 }
2520 // The end of workaround for cmplx4
2521 
2522 
2523 // !!! TODO: check if we need to return void for cmplx4 routines
2524 // cmplx4 routines to return void
2525 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt_rev
2526 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt_rev
2527 
2528 ATOMIC_CRITICAL_CPT_REV( cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt_rev
2529 ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev
2530 ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev
2531 ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev
2532 #if KMP_HAVE_QUAD
2533 ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev
2534 ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev
2535 #if ( KMP_ARCH_X86 )
2536  ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
2537  ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
2538 #endif
2539 #endif
2540 
2541 // Capture reverse for mixed type: RHS=float16
2542 #if KMP_HAVE_QUAD
2543 
2544 // Beginning of a definition (provides name, parameters, gebug trace)
2545 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
2546 // OP_ID - operation identifier (add, sub, mul, ...)
2547 // TYPE - operands' type
2548 // -------------------------------------------------------------------------
2549 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
2550 ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE) \
2551  TYPE new_value; \
2552  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2553  OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2554 }
2555 
2556 // -------------------------------------------------------------------------
2557 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \
2558 ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE) \
2559  TYPE new_value; \
2560  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) /* send assignment */ \
2561  OP_CRITICAL_CPT_REV(OP,LCK_ID) /* send assignment */ \
2562 }
2563 
2564 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
2565 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
2566 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev_fp
2567 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
2568 
2569 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
2570 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
2571 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev_fp
2572 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
2573 
2574 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
2575 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
2576 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_cpt_rev_fp
2577 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
2578 
2579 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
2580 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
2581 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev_fp
2582 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
2583 
2584 ATOMIC_CMPXCHG_CPT_REV_MIX( float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev_fp
2585 ATOMIC_CMPXCHG_CPT_REV_MIX( float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev_fp
2586 
2587 ATOMIC_CMPXCHG_CPT_REV_MIX( float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev_fp
2588 ATOMIC_CMPXCHG_CPT_REV_MIX( float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev_fp
2589 
2590 ATOMIC_CRITICAL_CPT_REV_MIX( float10, long double, sub_cpt_rev, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev_fp
2591 ATOMIC_CRITICAL_CPT_REV_MIX( float10, long double, div_cpt_rev, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev_fp
2592 
2593 #endif //KMP_HAVE_QUAD
2594 
2595 
2596 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
2597 
2598 #define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2599 TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
2600 { \
2601  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2602  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2603 
2604 #define CRITICAL_SWP(LCK_ID) \
2605  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2606  \
2607  old_value = (*lhs); \
2608  (*lhs) = rhs; \
2609  \
2610  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2611  return old_value;
2612 
2613 // ------------------------------------------------------------------------
2614 #ifdef KMP_GOMP_COMPAT
2615 #define GOMP_CRITICAL_SWP(FLAG) \
2616  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2617  KMP_CHECK_GTID; \
2618  CRITICAL_SWP( 0 ); \
2619  }
2620 #else
2621 #define GOMP_CRITICAL_SWP(FLAG)
2622 #endif /* KMP_GOMP_COMPAT */
2623 
2624 
2625 #define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2626 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2627  TYPE old_value; \
2628  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2629  old_value = KMP_XCHG_FIXED##BITS( lhs, rhs ); \
2630  return old_value; \
2631 }
2632 // ------------------------------------------------------------------------
2633 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2634 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2635  TYPE old_value; \
2636  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2637  old_value = KMP_XCHG_REAL##BITS( lhs, rhs ); \
2638  return old_value; \
2639 }
2640 
2641 // ------------------------------------------------------------------------
2642 #define CMPXCHG_SWP(TYPE,BITS) \
2643  { \
2644  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2645  TYPE old_value, new_value; \
2646  temp_val = *lhs; \
2647  old_value = temp_val; \
2648  new_value = rhs; \
2649  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2650  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2651  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2652  { \
2653  KMP_CPU_PAUSE(); \
2654  \
2655  temp_val = *lhs; \
2656  old_value = temp_val; \
2657  new_value = rhs; \
2658  } \
2659  return old_value; \
2660  }
2661 
2662 // -------------------------------------------------------------------------
2663 #define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2664 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2665  TYPE old_value; \
2666  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2667  CMPXCHG_SWP(TYPE,BITS) \
2668 }
2669 
2670 ATOMIC_XCHG_SWP( fixed1, kmp_int8, 8, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_swp
2671 ATOMIC_XCHG_SWP( fixed2, kmp_int16, 16, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_swp
2672 ATOMIC_XCHG_SWP( fixed4, kmp_int32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_swp
2673 
2674 ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_float4_swp
2675 
2676 #if ( KMP_ARCH_X86 )
2677  ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2678  ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2679 #else
2680  ATOMIC_XCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2681  ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2682 #endif
2683 
2684 // ------------------------------------------------------------------------
2685 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2686 #define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG) \
2687 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2688  TYPE old_value; \
2689  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2690  CRITICAL_SWP(LCK_ID) \
2691 }
2692 
2693 // ------------------------------------------------------------------------
2694 
2695 // !!! TODO: check if we need to return void for cmplx4 routines
2696 // Workaround for cmplx4. Regular routines with return value don't work
2697 // on Win_32e. Let's return captured values through the additional parameter.
2698 
2699 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2700 void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out ) \
2701 { \
2702  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2703  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2704 
2705 
2706 #define CRITICAL_SWP_WRK(LCK_ID) \
2707  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2708  \
2709  tmp = (*lhs); \
2710  (*lhs) = (rhs); \
2711  (*out) = tmp; \
2712  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2713  return;
2714 
2715 // ------------------------------------------------------------------------
2716 
2717 #ifdef KMP_GOMP_COMPAT
2718 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
2719  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2720  KMP_CHECK_GTID; \
2721  CRITICAL_SWP_WRK( 0 ); \
2722  }
2723 #else
2724 #define GOMP_CRITICAL_SWP_WRK(FLAG)
2725 #endif /* KMP_GOMP_COMPAT */
2726 // ------------------------------------------------------------------------
2727 
2728 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG) \
2729 ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2730  TYPE tmp; \
2731  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
2732  CRITICAL_SWP_WRK(LCK_ID) \
2733 }
2734 // The end of workaround for cmplx4
2735 
2736 
2737 ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp
2738 #if KMP_HAVE_QUAD
2739 ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp
2740 #endif
2741 // cmplx4 routine to return void
2742 ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2743 
2744 //ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2745 
2746 
2747 ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp
2748 ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp
2749 #if KMP_HAVE_QUAD
2750 ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp
2751 #if ( KMP_ARCH_X86 )
2752  ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp
2753  ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp
2754 #endif
2755 #endif
2756 
2757 
2758 // End of OpenMP 4.0 Capture
2759 
2760 #endif //OMP_40_ENABLED
2761 
2762 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
2763 
2764 
2765 #undef OP_CRITICAL
2766 
2767 /* ------------------------------------------------------------------------ */
2768 /* Generic atomic routines */
2769 /* ------------------------------------------------------------------------ */
2770 
2771 void
2772 __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2773 {
2774  KMP_DEBUG_ASSERT( __kmp_init_serial );
2775 
2776  if (
2777 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2778  FALSE /* must use lock */
2779 #else
2780  TRUE
2781 #endif
2782  )
2783  {
2784  kmp_int8 old_value, new_value;
2785 
2786  old_value = *(kmp_int8 *) lhs;
2787  (*f)( &new_value, &old_value, rhs );
2788 
2789  /* TODO: Should this be acquire or release? */
2790  while ( ! KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs,
2791  *(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) )
2792  {
2793  KMP_CPU_PAUSE();
2794 
2795  old_value = *(kmp_int8 *) lhs;
2796  (*f)( &new_value, &old_value, rhs );
2797  }
2798 
2799  return;
2800  }
2801  else {
2802  //
2803  // All 1-byte data is of integer data type.
2804  //
2805 
2806 #ifdef KMP_GOMP_COMPAT
2807  if ( __kmp_atomic_mode == 2 ) {
2808  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2809  }
2810  else
2811 #endif /* KMP_GOMP_COMPAT */
2812  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2813 
2814  (*f)( lhs, lhs, rhs );
2815 
2816 #ifdef KMP_GOMP_COMPAT
2817  if ( __kmp_atomic_mode == 2 ) {
2818  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2819  }
2820  else
2821 #endif /* KMP_GOMP_COMPAT */
2822  __kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2823  }
2824 }
2825 
2826 void
2827 __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2828 {
2829  if (
2830 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2831  FALSE /* must use lock */
2832 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2833  TRUE /* no alignment problems */
2834 #else
2835  ! ( (kmp_uintptr_t) lhs & 0x1) /* make sure address is 2-byte aligned */
2836 #endif
2837  )
2838  {
2839  kmp_int16 old_value, new_value;
2840 
2841  old_value = *(kmp_int16 *) lhs;
2842  (*f)( &new_value, &old_value, rhs );
2843 
2844  /* TODO: Should this be acquire or release? */
2845  while ( ! KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs,
2846  *(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) )
2847  {
2848  KMP_CPU_PAUSE();
2849 
2850  old_value = *(kmp_int16 *) lhs;
2851  (*f)( &new_value, &old_value, rhs );
2852  }
2853 
2854  return;
2855  }
2856  else {
2857  //
2858  // All 2-byte data is of integer data type.
2859  //
2860 
2861 #ifdef KMP_GOMP_COMPAT
2862  if ( __kmp_atomic_mode == 2 ) {
2863  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2864  }
2865  else
2866 #endif /* KMP_GOMP_COMPAT */
2867  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2868 
2869  (*f)( lhs, lhs, rhs );
2870 
2871 #ifdef KMP_GOMP_COMPAT
2872  if ( __kmp_atomic_mode == 2 ) {
2873  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2874  }
2875  else
2876 #endif /* KMP_GOMP_COMPAT */
2877  __kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2878  }
2879 }
2880 
2881 void
2882 __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2883 {
2884  KMP_DEBUG_ASSERT( __kmp_init_serial );
2885 
2886  if (
2887  //
2888  // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
2889  // Gomp compatibility is broken if this routine is called for floats.
2890  //
2891 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2892  TRUE /* no alignment problems */
2893 #else
2894  ! ( (kmp_uintptr_t) lhs & 0x3) /* make sure address is 4-byte aligned */
2895 #endif
2896  )
2897  {
2898  kmp_int32 old_value, new_value;
2899 
2900  old_value = *(kmp_int32 *) lhs;
2901  (*f)( &new_value, &old_value, rhs );
2902 
2903  /* TODO: Should this be acquire or release? */
2904  while ( ! KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs,
2905  *(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) )
2906  {
2907  KMP_CPU_PAUSE();
2908 
2909  old_value = *(kmp_int32 *) lhs;
2910  (*f)( &new_value, &old_value, rhs );
2911  }
2912 
2913  return;
2914  }
2915  else {
2916  //
2917  // Use __kmp_atomic_lock_4i for all 4-byte data,
2918  // even if it isn't of integer data type.
2919  //
2920 
2921 #ifdef KMP_GOMP_COMPAT
2922  if ( __kmp_atomic_mode == 2 ) {
2923  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2924  }
2925  else
2926 #endif /* KMP_GOMP_COMPAT */
2927  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2928 
2929  (*f)( lhs, lhs, rhs );
2930 
2931 #ifdef KMP_GOMP_COMPAT
2932  if ( __kmp_atomic_mode == 2 ) {
2933  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2934  }
2935  else
2936 #endif /* KMP_GOMP_COMPAT */
2937  __kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2938  }
2939 }
2940 
2941 void
2942 __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2943 {
2944  KMP_DEBUG_ASSERT( __kmp_init_serial );
2945  if (
2946 
2947 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2948  FALSE /* must use lock */
2949 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2950  TRUE /* no alignment problems */
2951 #else
2952  ! ( (kmp_uintptr_t) lhs & 0x7) /* make sure address is 8-byte aligned */
2953 #endif
2954  )
2955  {
2956  kmp_int64 old_value, new_value;
2957 
2958  old_value = *(kmp_int64 *) lhs;
2959  (*f)( &new_value, &old_value, rhs );
2960  /* TODO: Should this be acquire or release? */
2961  while ( ! KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs,
2962  *(kmp_int64 *) &old_value,
2963  *(kmp_int64 *) &new_value ) )
2964  {
2965  KMP_CPU_PAUSE();
2966 
2967  old_value = *(kmp_int64 *) lhs;
2968  (*f)( &new_value, &old_value, rhs );
2969  }
2970 
2971  return;
2972  } else {
2973  //
2974  // Use __kmp_atomic_lock_8i for all 8-byte data,
2975  // even if it isn't of integer data type.
2976  //
2977 
2978 #ifdef KMP_GOMP_COMPAT
2979  if ( __kmp_atomic_mode == 2 ) {
2980  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2981  }
2982  else
2983 #endif /* KMP_GOMP_COMPAT */
2984  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2985 
2986  (*f)( lhs, lhs, rhs );
2987 
2988 #ifdef KMP_GOMP_COMPAT
2989  if ( __kmp_atomic_mode == 2 ) {
2990  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2991  }
2992  else
2993 #endif /* KMP_GOMP_COMPAT */
2994  __kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2995  }
2996 }
2997 
2998 void
2999 __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
3000 {
3001  KMP_DEBUG_ASSERT( __kmp_init_serial );
3002 
3003 #ifdef KMP_GOMP_COMPAT
3004  if ( __kmp_atomic_mode == 2 ) {
3005  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
3006  }
3007  else
3008 #endif /* KMP_GOMP_COMPAT */
3009  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid );
3010 
3011  (*f)( lhs, lhs, rhs );
3012 
3013 #ifdef KMP_GOMP_COMPAT
3014  if ( __kmp_atomic_mode == 2 ) {
3015  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
3016  }
3017  else
3018 #endif /* KMP_GOMP_COMPAT */
3019  __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid );
3020 }
3021 
3022 void
3023 __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
3024 {
3025  KMP_DEBUG_ASSERT( __kmp_init_serial );
3026 
3027 #ifdef KMP_GOMP_COMPAT
3028  if ( __kmp_atomic_mode == 2 ) {
3029  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
3030  }
3031  else
3032 #endif /* KMP_GOMP_COMPAT */
3033  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid );
3034 
3035  (*f)( lhs, lhs, rhs );
3036 
3037 #ifdef KMP_GOMP_COMPAT
3038  if ( __kmp_atomic_mode == 2 ) {
3039  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
3040  }
3041  else
3042 #endif /* KMP_GOMP_COMPAT */
3043  __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid );
3044 }
3045 
3046 void
3047 __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
3048 {
3049  KMP_DEBUG_ASSERT( __kmp_init_serial );
3050 
3051 #ifdef KMP_GOMP_COMPAT
3052  if ( __kmp_atomic_mode == 2 ) {
3053  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
3054  }
3055  else
3056 #endif /* KMP_GOMP_COMPAT */
3057  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid );
3058 
3059  (*f)( lhs, lhs, rhs );
3060 
3061 #ifdef KMP_GOMP_COMPAT
3062  if ( __kmp_atomic_mode == 2 ) {
3063  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
3064  }
3065  else
3066 #endif /* KMP_GOMP_COMPAT */
3067  __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid );
3068 }
3069 
3070 void
3071 __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
3072 {
3073  KMP_DEBUG_ASSERT( __kmp_init_serial );
3074 
3075 #ifdef KMP_GOMP_COMPAT
3076  if ( __kmp_atomic_mode == 2 ) {
3077  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
3078  }
3079  else
3080 #endif /* KMP_GOMP_COMPAT */
3081  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid );
3082 
3083  (*f)( lhs, lhs, rhs );
3084 
3085 #ifdef KMP_GOMP_COMPAT
3086  if ( __kmp_atomic_mode == 2 ) {
3087  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
3088  }
3089  else
3090 #endif /* KMP_GOMP_COMPAT */
3091  __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid );
3092 }
3093 
3094 // AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler
3095 // duplicated in order to not use 3-party names in pure Intel code
3096 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3097 void
3098 __kmpc_atomic_start(void)
3099 {
3100  int gtid = __kmp_entry_gtid();
3101  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3102  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3103 }
3104 
3105 
3106 void
3107 __kmpc_atomic_end(void)
3108 {
3109  int gtid = __kmp_get_gtid();
3110  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3111  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3112 }
3113 
3114 /* ------------------------------------------------------------------------ */
3115 /* ------------------------------------------------------------------------ */
3120 // end of file
Definition: kmp.h:200