LLVM OpenMP* Runtime Library
kmp_atomic.cpp
1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "kmp_atomic.h"
15 #include "kmp.h" // TRUE, asm routines prototypes
16 
17 typedef unsigned char uchar;
18 typedef unsigned short ushort;
19 
562 /*
563  * Global vars
564  */
565 
566 #ifndef KMP_GOMP_COMPAT
567 int __kmp_atomic_mode = 1; // Intel perf
568 #else
569 int __kmp_atomic_mode = 2; // GOMP compatibility
570 #endif /* KMP_GOMP_COMPAT */
571 
572 KMP_ALIGN(128)
573 
574 // Control access to all user coded atomics in Gnu compat mode
575 kmp_atomic_lock_t __kmp_atomic_lock;
576 // Control access to all user coded atomics for 1-byte fixed data types
577 kmp_atomic_lock_t __kmp_atomic_lock_1i;
578 // Control access to all user coded atomics for 2-byte fixed data types
579 kmp_atomic_lock_t __kmp_atomic_lock_2i;
580 // Control access to all user coded atomics for 4-byte fixed data types
581 kmp_atomic_lock_t __kmp_atomic_lock_4i;
582 // Control access to all user coded atomics for kmp_real32 data type
583 kmp_atomic_lock_t __kmp_atomic_lock_4r;
584 // Control access to all user coded atomics for 8-byte fixed data types
585 kmp_atomic_lock_t __kmp_atomic_lock_8i;
586 // Control access to all user coded atomics for kmp_real64 data type
587 kmp_atomic_lock_t __kmp_atomic_lock_8r;
588 // Control access to all user coded atomics for complex byte data type
589 kmp_atomic_lock_t __kmp_atomic_lock_8c;
590 // Control access to all user coded atomics for long double data type
591 kmp_atomic_lock_t __kmp_atomic_lock_10r;
592 // Control access to all user coded atomics for _Quad data type
593 kmp_atomic_lock_t __kmp_atomic_lock_16r;
594 // Control access to all user coded atomics for double complex data type
595 kmp_atomic_lock_t __kmp_atomic_lock_16c;
596 // Control access to all user coded atomics for long double complex type
597 kmp_atomic_lock_t __kmp_atomic_lock_20c;
598 // Control access to all user coded atomics for _Quad complex data type
599 kmp_atomic_lock_t __kmp_atomic_lock_32c;
600 
601 /* 2007-03-02:
602  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
603  on *_32 and *_32e. This is just a temporary workaround for the problem. It
604  seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
605  in assembler language. */
606 #define KMP_ATOMIC_VOLATILE volatile
607 
608 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
609 
610 static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
611  lhs.q += rhs.q;
612 }
613 static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
614  lhs.q -= rhs.q;
615 }
616 static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
617  lhs.q *= rhs.q;
618 }
619 static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
620  lhs.q /= rhs.q;
621 }
622 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
623  return lhs.q < rhs.q;
624 }
625 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
626  return lhs.q > rhs.q;
627 }
628 
629 static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
630  lhs.q += rhs.q;
631 }
632 static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
633  lhs.q -= rhs.q;
634 }
635 static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
636  lhs.q *= rhs.q;
637 }
638 static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
639  lhs.q /= rhs.q;
640 }
641 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
642  return lhs.q < rhs.q;
643 }
644 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
645  return lhs.q > rhs.q;
646 }
647 
648 static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
649  lhs.q += rhs.q;
650 }
651 static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
652  lhs.q -= rhs.q;
653 }
654 static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
655  lhs.q *= rhs.q;
656 }
657 static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
658  lhs.q /= rhs.q;
659 }
660 
661 static inline void operator+=(kmp_cmplx128_a16_t &lhs,
662  kmp_cmplx128_a16_t &rhs) {
663  lhs.q += rhs.q;
664 }
665 static inline void operator-=(kmp_cmplx128_a16_t &lhs,
666  kmp_cmplx128_a16_t &rhs) {
667  lhs.q -= rhs.q;
668 }
669 static inline void operator*=(kmp_cmplx128_a16_t &lhs,
670  kmp_cmplx128_a16_t &rhs) {
671  lhs.q *= rhs.q;
672 }
673 static inline void operator/=(kmp_cmplx128_a16_t &lhs,
674  kmp_cmplx128_a16_t &rhs) {
675  lhs.q /= rhs.q;
676 }
677 
678 #endif
679 
680 // ATOMIC implementation routines -----------------------------------------
681 // One routine for each operation and operand type.
682 // All routines declarations looks like
683 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
684 
685 #define KMP_CHECK_GTID \
686  if (gtid == KMP_GTID_UNKNOWN) { \
687  gtid = __kmp_entry_gtid(); \
688  } // check and get gtid when needed
689 
690 // Beginning of a definition (provides name, parameters, gebug trace)
691 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
692 // fixed)
693 // OP_ID - operation identifier (add, sub, mul, ...)
694 // TYPE - operands' type
695 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
696  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
697  TYPE *lhs, TYPE rhs) { \
698  KMP_DEBUG_ASSERT(__kmp_init_serial); \
699  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
700 
701 // ------------------------------------------------------------------------
702 // Lock variables used for critical sections for various size operands
703 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
704 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
705 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
706 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
707 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
708 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
709 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
710 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
711 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
712 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
713 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
714 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
715 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
716 
717 // ------------------------------------------------------------------------
718 // Operation on *lhs, rhs bound by critical section
719 // OP - operator (it's supposed to contain an assignment)
720 // LCK_ID - lock identifier
721 // Note: don't check gtid as it should always be valid
722 // 1, 2-byte - expect valid parameter, other - check before this macro
723 #define OP_CRITICAL(OP, LCK_ID) \
724  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
725  \
726  (*lhs) OP(rhs); \
727  \
728  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
729 
730 // ------------------------------------------------------------------------
731 // For GNU compatibility, we may need to use a critical section,
732 // even though it is not required by the ISA.
733 //
734 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
735 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
736 // critical section. On Intel(R) 64, all atomic operations are done with fetch
737 // and add or compare and exchange. Therefore, the FLAG parameter to this
738 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
739 // require a critical section, where we predict that they will be implemented
740 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
741 //
742 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
743 // the FLAG parameter should always be 1. If we know that we will be using
744 // a critical section, then we want to make certain that we use the generic
745 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
746 // locks that are specialized based upon the size or type of the data.
747 //
748 // If FLAG is 0, then we are relying on dead code elimination by the build
749 // compiler to get rid of the useless block of code, and save a needless
750 // branch at runtime.
751 
752 #ifdef KMP_GOMP_COMPAT
753 #define OP_GOMP_CRITICAL(OP, FLAG) \
754  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
755  KMP_CHECK_GTID; \
756  OP_CRITICAL(OP, 0); \
757  return; \
758  }
759 #else
760 #define OP_GOMP_CRITICAL(OP, FLAG)
761 #endif /* KMP_GOMP_COMPAT */
762 
763 #if KMP_MIC
764 #define KMP_DO_PAUSE _mm_delay_32(1)
765 #else
766 #define KMP_DO_PAUSE KMP_CPU_PAUSE()
767 #endif /* KMP_MIC */
768 
769 // ------------------------------------------------------------------------
770 // Operation on *lhs, rhs using "compare_and_store" routine
771 // TYPE - operands' type
772 // BITS - size in bits, used to distinguish low level calls
773 // OP - operator
774 #define OP_CMPXCHG(TYPE, BITS, OP) \
775  { \
776  TYPE old_value, new_value; \
777  old_value = *(TYPE volatile *)lhs; \
778  new_value = old_value OP rhs; \
779  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
780  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
781  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
782  KMP_DO_PAUSE; \
783  \
784  old_value = *(TYPE volatile *)lhs; \
785  new_value = old_value OP rhs; \
786  } \
787  }
788 
789 #if USE_CMPXCHG_FIX
790 // 2007-06-25:
791 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
792 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
793 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
794 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
795 // the workaround.
796 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
797  { \
798  struct _sss { \
799  TYPE cmp; \
800  kmp_int##BITS *vvv; \
801  }; \
802  struct _sss old_value, new_value; \
803  old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
804  new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
805  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
806  new_value.cmp = old_value.cmp OP rhs; \
807  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
808  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
809  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
810  KMP_DO_PAUSE; \
811  \
812  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
813  new_value.cmp = old_value.cmp OP rhs; \
814  } \
815  }
816 // end of the first part of the workaround for C78287
817 #endif // USE_CMPXCHG_FIX
818 
819 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
820 
821 // ------------------------------------------------------------------------
822 // X86 or X86_64: no alignment problems ====================================
823 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
824  GOMP_FLAG) \
825  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
826  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
827  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
828  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
829  }
830 // -------------------------------------------------------------------------
831 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
832  GOMP_FLAG) \
833  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
834  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
835  OP_CMPXCHG(TYPE, BITS, OP) \
836  }
837 #if USE_CMPXCHG_FIX
838 // -------------------------------------------------------------------------
839 // workaround for C78287 (complex(kind=4) data type)
840 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
841  MASK, GOMP_FLAG) \
842  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
843  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
844  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
845  }
846 // end of the second part of the workaround for C78287
847 #endif
848 
849 #else
850 // -------------------------------------------------------------------------
851 // Code for other architectures that don't handle unaligned accesses.
852 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
853  GOMP_FLAG) \
854  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
855  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
856  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
857  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
858  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
859  } else { \
860  KMP_CHECK_GTID; \
861  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
862  } \
863  }
864 // -------------------------------------------------------------------------
865 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
866  GOMP_FLAG) \
867  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
868  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
869  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
870  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
871  } else { \
872  KMP_CHECK_GTID; \
873  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
874  } \
875  }
876 #if USE_CMPXCHG_FIX
877 // -------------------------------------------------------------------------
878 // workaround for C78287 (complex(kind=4) data type)
879 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
880  MASK, GOMP_FLAG) \
881  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
882  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
883  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
884  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
885  } else { \
886  KMP_CHECK_GTID; \
887  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
888  } \
889  }
890 // end of the second part of the workaround for C78287
891 #endif // USE_CMPXCHG_FIX
892 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
893 
894 // Routines for ATOMIC 4-byte operands addition and subtraction
895 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
896  0) // __kmpc_atomic_fixed4_add
897 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
898  0) // __kmpc_atomic_fixed4_sub
899 
900 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
901  KMP_ARCH_X86) // __kmpc_atomic_float4_add
902 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
903  KMP_ARCH_X86) // __kmpc_atomic_float4_sub
904 
905 // Routines for ATOMIC 8-byte operands addition and subtraction
906 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
907  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
908 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
909  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
910 
911 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
912  KMP_ARCH_X86) // __kmpc_atomic_float8_add
913 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
914  KMP_ARCH_X86) // __kmpc_atomic_float8_sub
915 
916 // ------------------------------------------------------------------------
917 // Entries definition for integer operands
918 // TYPE_ID - operands type and size (fixed4, float4)
919 // OP_ID - operation identifier (add, sub, mul, ...)
920 // TYPE - operand type
921 // BITS - size in bits, used to distinguish low level calls
922 // OP - operator (used in critical section)
923 // LCK_ID - lock identifier, used to possibly distinguish lock variable
924 // MASK - used for alignment check
925 
926 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
927 // ------------------------------------------------------------------------
928 // Routines for ATOMIC integer operands, other operators
929 // ------------------------------------------------------------------------
930 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
931 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
932  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
933 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
934  0) // __kmpc_atomic_fixed1_andb
935 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
936  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
937 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
938  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
939 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
940  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
941 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
942  0) // __kmpc_atomic_fixed1_orb
943 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
944  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
945 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
946  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
947 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
948  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
949 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
950  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
951 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
952  0) // __kmpc_atomic_fixed1_xor
953 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
954  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
955 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
956  0) // __kmpc_atomic_fixed2_andb
957 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
958  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
959 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
960  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
961 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
962  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
963 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
964  0) // __kmpc_atomic_fixed2_orb
965 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
966  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
967 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
968  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
969 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
970  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
971 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
972  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
973 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
974  0) // __kmpc_atomic_fixed2_xor
975 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
976  0) // __kmpc_atomic_fixed4_andb
977 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
978  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
979 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
980  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
981 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
982  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
983 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
984  0) // __kmpc_atomic_fixed4_orb
985 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
986  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
987 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
988  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
989 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
990  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
991 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
992  0) // __kmpc_atomic_fixed4_xor
993 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
994  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
995 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
996  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
997 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
998  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
999 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1000  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1001 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1002  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1003 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1004  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1005 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1006  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1007 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1008  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1009 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1010  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1011 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1012  KMP_ARCH_X86) // __kmpc_atomic_float4_div
1013 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1014  KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1015 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1016  KMP_ARCH_X86) // __kmpc_atomic_float8_div
1017 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1018  KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1019 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1020 
1021 /* ------------------------------------------------------------------------ */
1022 /* Routines for C/C++ Reduction operators && and || */
1023 
1024 // ------------------------------------------------------------------------
1025 // Need separate macros for &&, || because there is no combined assignment
1026 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1027 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1028  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1029  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1030  OP_CRITICAL(= *lhs OP, LCK_ID) \
1031  }
1032 
1033 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1034 
1035 // ------------------------------------------------------------------------
1036 // X86 or X86_64: no alignment problems ===================================
1037 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1038  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1039  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1040  OP_CMPXCHG(TYPE, BITS, OP) \
1041  }
1042 
1043 #else
1044 // ------------------------------------------------------------------------
1045 // Code for other architectures that don't handle unaligned accesses.
1046 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1047  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1048  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1049  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1050  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1051  } else { \
1052  KMP_CHECK_GTID; \
1053  OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1054  } \
1055  }
1056 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1057 
1058 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1059  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1060 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1061  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1062 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1063  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1064 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1065  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1066 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1067  0) // __kmpc_atomic_fixed4_andl
1068 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1069  0) // __kmpc_atomic_fixed4_orl
1070 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1071  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1072 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1073  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1074 
1075 /* ------------------------------------------------------------------------- */
1076 /* Routines for Fortran operators that matched no one in C: */
1077 /* MAX, MIN, .EQV., .NEQV. */
1078 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1079 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1080 
1081 // -------------------------------------------------------------------------
1082 // MIN and MAX need separate macros
1083 // OP - operator to check if we need any actions?
1084 #define MIN_MAX_CRITSECT(OP, LCK_ID) \
1085  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1086  \
1087  if (*lhs OP rhs) { /* still need actions? */ \
1088  *lhs = rhs; \
1089  } \
1090  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1091 
1092 // -------------------------------------------------------------------------
1093 #ifdef KMP_GOMP_COMPAT
1094 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1095  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1096  KMP_CHECK_GTID; \
1097  MIN_MAX_CRITSECT(OP, 0); \
1098  return; \
1099  }
1100 #else
1101 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1102 #endif /* KMP_GOMP_COMPAT */
1103 
1104 // -------------------------------------------------------------------------
1105 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1106  { \
1107  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1108  TYPE old_value; \
1109  temp_val = *lhs; \
1110  old_value = temp_val; \
1111  while (old_value OP rhs && /* still need actions? */ \
1112  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1113  (kmp_int##BITS *)lhs, \
1114  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1115  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1116  KMP_CPU_PAUSE(); \
1117  temp_val = *lhs; \
1118  old_value = temp_val; \
1119  } \
1120  }
1121 
1122 // -------------------------------------------------------------------------
1123 // 1-byte, 2-byte operands - use critical section
1124 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1125  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1126  if (*lhs OP rhs) { /* need actions? */ \
1127  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1128  MIN_MAX_CRITSECT(OP, LCK_ID) \
1129  } \
1130  }
1131 
1132 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1133 
1134 // -------------------------------------------------------------------------
1135 // X86 or X86_64: no alignment problems ====================================
1136 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1137  GOMP_FLAG) \
1138  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1139  if (*lhs OP rhs) { \
1140  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1141  MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1142  } \
1143  }
1144 
1145 #else
1146 // -------------------------------------------------------------------------
1147 // Code for other architectures that don't handle unaligned accesses.
1148 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1149  GOMP_FLAG) \
1150  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1151  if (*lhs OP rhs) { \
1152  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1153  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1154  MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1155  } else { \
1156  KMP_CHECK_GTID; \
1157  MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1158  } \
1159  } \
1160  }
1161 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1162 
1163 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1164  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1165 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1166  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1167 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1168  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1169 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1170  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1171 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1172  0) // __kmpc_atomic_fixed4_max
1173 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1174  0) // __kmpc_atomic_fixed4_min
1175 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1176  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1177 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1178  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1179 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1180  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1181 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1182  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1183 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1184  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1185 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1186  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1187 #if KMP_HAVE_QUAD
1188 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1189  1) // __kmpc_atomic_float16_max
1190 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1191  1) // __kmpc_atomic_float16_min
1192 #if (KMP_ARCH_X86)
1193 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1194  1) // __kmpc_atomic_float16_max_a16
1195 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1196  1) // __kmpc_atomic_float16_min_a16
1197 #endif
1198 #endif
1199 // ------------------------------------------------------------------------
1200 // Need separate macros for .EQV. because of the need of complement (~)
1201 // OP ignored for critical sections, ^=~ used instead
1202 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1203  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1204  OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \
1205  OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */ \
1206  }
1207 
1208 // ------------------------------------------------------------------------
1209 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1210 // ------------------------------------------------------------------------
1211 // X86 or X86_64: no alignment problems ===================================
1212 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1213  GOMP_FLAG) \
1214  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1215  OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \
1216  OP_CMPXCHG(TYPE, BITS, OP) \
1217  }
1218 // ------------------------------------------------------------------------
1219 #else
1220 // ------------------------------------------------------------------------
1221 // Code for other architectures that don't handle unaligned accesses.
1222 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1223  GOMP_FLAG) \
1224  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1225  OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) \
1226  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1227  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1228  } else { \
1229  KMP_CHECK_GTID; \
1230  OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \
1231  } \
1232  }
1233 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1234 
1235 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1236  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1237 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1238  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1239 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1240  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1241 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1242  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1243 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1244  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1245 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1246  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1247 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1248  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1249 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1250  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1251 
1252 // ------------------------------------------------------------------------
1253 // Routines for Extended types: long double, _Quad, complex flavours (use
1254 // critical section)
1255 // TYPE_ID, OP_ID, TYPE - detailed above
1256 // OP - operator
1257 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1258 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1259  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1260  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \
1261  OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \
1262  }
1263 
1264 /* ------------------------------------------------------------------------- */
1265 // routines for long double type
1266 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1267  1) // __kmpc_atomic_float10_add
1268 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1269  1) // __kmpc_atomic_float10_sub
1270 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1271  1) // __kmpc_atomic_float10_mul
1272 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1273  1) // __kmpc_atomic_float10_div
1274 #if KMP_HAVE_QUAD
1275 // routines for _Quad type
1276 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1277  1) // __kmpc_atomic_float16_add
1278 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1279  1) // __kmpc_atomic_float16_sub
1280 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1281  1) // __kmpc_atomic_float16_mul
1282 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1283  1) // __kmpc_atomic_float16_div
1284 #if (KMP_ARCH_X86)
1285 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1286  1) // __kmpc_atomic_float16_add_a16
1287 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1288  1) // __kmpc_atomic_float16_sub_a16
1289 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1290  1) // __kmpc_atomic_float16_mul_a16
1291 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1292  1) // __kmpc_atomic_float16_div_a16
1293 #endif
1294 #endif
1295 // routines for complex types
1296 
1297 #if USE_CMPXCHG_FIX
1298 // workaround for C78287 (complex(kind=4) data type)
1299 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1300  1) // __kmpc_atomic_cmplx4_add
1301 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1302  1) // __kmpc_atomic_cmplx4_sub
1303 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1304  1) // __kmpc_atomic_cmplx4_mul
1305 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1306  1) // __kmpc_atomic_cmplx4_div
1307 // end of the workaround for C78287
1308 #else
1309 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1310 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1311 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1312 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1313 #endif // USE_CMPXCHG_FIX
1314 
1315 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1316 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1317 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1318 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1319 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1320  1) // __kmpc_atomic_cmplx10_add
1321 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1322  1) // __kmpc_atomic_cmplx10_sub
1323 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1324  1) // __kmpc_atomic_cmplx10_mul
1325 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1326  1) // __kmpc_atomic_cmplx10_div
1327 #if KMP_HAVE_QUAD
1328 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1329  1) // __kmpc_atomic_cmplx16_add
1330 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1331  1) // __kmpc_atomic_cmplx16_sub
1332 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1333  1) // __kmpc_atomic_cmplx16_mul
1334 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1335  1) // __kmpc_atomic_cmplx16_div
1336 #if (KMP_ARCH_X86)
1337 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1338  1) // __kmpc_atomic_cmplx16_add_a16
1339 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1340  1) // __kmpc_atomic_cmplx16_sub_a16
1341 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1342  1) // __kmpc_atomic_cmplx16_mul_a16
1343 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1344  1) // __kmpc_atomic_cmplx16_div_a16
1345 #endif
1346 #endif
1347 
1348 #if OMP_40_ENABLED
1349 
1350 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1351 // Supported only on IA-32 architecture and Intel(R) 64
1352 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1353 
1354 // ------------------------------------------------------------------------
1355 // Operation on *lhs, rhs bound by critical section
1356 // OP - operator (it's supposed to contain an assignment)
1357 // LCK_ID - lock identifier
1358 // Note: don't check gtid as it should always be valid
1359 // 1, 2-byte - expect valid parameter, other - check before this macro
1360 #define OP_CRITICAL_REV(OP, LCK_ID) \
1361  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1362  \
1363  (*lhs) = (rhs)OP(*lhs); \
1364  \
1365  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1366 
1367 #ifdef KMP_GOMP_COMPAT
1368 #define OP_GOMP_CRITICAL_REV(OP, FLAG) \
1369  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1370  KMP_CHECK_GTID; \
1371  OP_CRITICAL_REV(OP, 0); \
1372  return; \
1373  }
1374 #else
1375 #define OP_GOMP_CRITICAL_REV(OP, FLAG)
1376 #endif /* KMP_GOMP_COMPAT */
1377 
1378 // Beginning of a definition (provides name, parameters, gebug trace)
1379 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1380 // fixed)
1381 // OP_ID - operation identifier (add, sub, mul, ...)
1382 // TYPE - operands' type
1383 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1384  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1385  TYPE *lhs, TYPE rhs) { \
1386  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1387  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1388 
1389 // ------------------------------------------------------------------------
1390 // Operation on *lhs, rhs using "compare_and_store" routine
1391 // TYPE - operands' type
1392 // BITS - size in bits, used to distinguish low level calls
1393 // OP - operator
1394 // Note: temp_val introduced in order to force the compiler to read
1395 // *lhs only once (w/o it the compiler reads *lhs twice)
1396 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1397  { \
1398  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1399  TYPE old_value, new_value; \
1400  temp_val = *lhs; \
1401  old_value = temp_val; \
1402  new_value = rhs OP old_value; \
1403  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1404  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1405  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1406  KMP_DO_PAUSE; \
1407  \
1408  temp_val = *lhs; \
1409  old_value = temp_val; \
1410  new_value = rhs OP old_value; \
1411  } \
1412  }
1413 
1414 // -------------------------------------------------------------------------
1415 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1416  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1417  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1418  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1419  }
1420 
1421 // ------------------------------------------------------------------------
1422 // Entries definition for integer operands
1423 // TYPE_ID - operands type and size (fixed4, float4)
1424 // OP_ID - operation identifier (add, sub, mul, ...)
1425 // TYPE - operand type
1426 // BITS - size in bits, used to distinguish low level calls
1427 // OP - operator (used in critical section)
1428 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1429 
1430 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1431 // ------------------------------------------------------------------------
1432 // Routines for ATOMIC integer operands, other operators
1433 // ------------------------------------------------------------------------
1434 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1435 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1436  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1437 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1438  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1439 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1440  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1441 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1442  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1443 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1444  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1445 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1446  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1447 
1448 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1449  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1450 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1451  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1452 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1453  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1454 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1455  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1456 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1457  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1458 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1459  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1460 
1461 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1462  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1463 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1464  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1465 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1466  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1467 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1468  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1469 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1470  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1471 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1472  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1473 
1474 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1475  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1476 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1477  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1478 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1479  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1480 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1481  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1482 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1483  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1484 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1485  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1486 
1487 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1488  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1489 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1490  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1491 
1492 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1493  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1494 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1495  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1496 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1497 
1498 // ------------------------------------------------------------------------
1499 // Routines for Extended types: long double, _Quad, complex flavours (use
1500 // critical section)
1501 // TYPE_ID, OP_ID, TYPE - detailed above
1502 // OP - operator
1503 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1504 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1505  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1506  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1507  OP_CRITICAL_REV(OP, LCK_ID) \
1508  }
1509 
1510 /* ------------------------------------------------------------------------- */
1511 // routines for long double type
1512 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1513  1) // __kmpc_atomic_float10_sub_rev
1514 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1515  1) // __kmpc_atomic_float10_div_rev
1516 #if KMP_HAVE_QUAD
1517 // routines for _Quad type
1518 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1519  1) // __kmpc_atomic_float16_sub_rev
1520 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1521  1) // __kmpc_atomic_float16_div_rev
1522 #if (KMP_ARCH_X86)
1523 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1524  1) // __kmpc_atomic_float16_sub_a16_rev
1525 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1526  1) // __kmpc_atomic_float16_div_a16_rev
1527 #endif
1528 #endif
1529 
1530 // routines for complex types
1531 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1532  1) // __kmpc_atomic_cmplx4_sub_rev
1533 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1534  1) // __kmpc_atomic_cmplx4_div_rev
1535 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1536  1) // __kmpc_atomic_cmplx8_sub_rev
1537 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1538  1) // __kmpc_atomic_cmplx8_div_rev
1539 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1540  1) // __kmpc_atomic_cmplx10_sub_rev
1541 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1542  1) // __kmpc_atomic_cmplx10_div_rev
1543 #if KMP_HAVE_QUAD
1544 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1545  1) // __kmpc_atomic_cmplx16_sub_rev
1546 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1547  1) // __kmpc_atomic_cmplx16_div_rev
1548 #if (KMP_ARCH_X86)
1549 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1550  1) // __kmpc_atomic_cmplx16_sub_a16_rev
1551 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1552  1) // __kmpc_atomic_cmplx16_div_a16_rev
1553 #endif
1554 #endif
1555 
1556 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1557 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1558 
1559 #endif // OMP_40_ENABLED
1560 
1561 /* ------------------------------------------------------------------------ */
1562 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1563 /* Note: in order to reduce the total number of types combinations */
1564 /* it is supposed that compiler converts RHS to longest floating type,*/
1565 /* that is _Quad, before call to any of these routines */
1566 /* Conversion to _Quad will be done by the compiler during calculation, */
1567 /* conversion back to TYPE - before the assignment, like: */
1568 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1569 /* Performance penalty expected because of SW emulation use */
1570 /* ------------------------------------------------------------------------ */
1571 
1572 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1573  void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1574  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1575  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1576  KA_TRACE(100, \
1577  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1578  gtid));
1579 
1580 // -------------------------------------------------------------------------
1581 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1582  GOMP_FLAG) \
1583  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1584  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \
1585  OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \
1586  }
1587 
1588 // -------------------------------------------------------------------------
1589 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1590 // -------------------------------------------------------------------------
1591 // X86 or X86_64: no alignment problems ====================================
1592 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1593  LCK_ID, MASK, GOMP_FLAG) \
1594  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1595  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1596  OP_CMPXCHG(TYPE, BITS, OP) \
1597  }
1598 // -------------------------------------------------------------------------
1599 #else
1600 // ------------------------------------------------------------------------
1601 // Code for other architectures that don't handle unaligned accesses.
1602 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1603  LCK_ID, MASK, GOMP_FLAG) \
1604  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1605  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1606  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1607  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1608  } else { \
1609  KMP_CHECK_GTID; \
1610  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
1611  } \
1612  }
1613 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1614 
1615 // -------------------------------------------------------------------------
1616 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1617 // -------------------------------------------------------------------------
1618 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1619  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1620  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1621  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1622  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1623  }
1624 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1625  LCK_ID, GOMP_FLAG) \
1626  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1627  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1628  OP_CRITICAL_REV(OP, LCK_ID) \
1629  }
1630 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1631 
1632 // RHS=float8
1633 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1634  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1635 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1636  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1637 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1638  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1639 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1640  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1641 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1642  0) // __kmpc_atomic_fixed4_mul_float8
1643 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1644  0) // __kmpc_atomic_fixed4_div_float8
1645 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1646  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1647 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1648  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1649 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1650  KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1651 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1652  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1653 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1654  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1655 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1656  KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1657 
1658 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1659 // use them)
1660 #if KMP_HAVE_QUAD
1661 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1662  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1663 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1664  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1665 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1666  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1667 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1668  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1669 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1670  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1671 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1672  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1673 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1674  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1675 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1676  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1677 
1678 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1679  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1680 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1681  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1682 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1683  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1684 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1685  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1686 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1687  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1688 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1689  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1690 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1691  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1692 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1693  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1694 
1695 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1696  0) // __kmpc_atomic_fixed4_add_fp
1697 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1698  0) // __kmpc_atomic_fixed4u_add_fp
1699 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1700  0) // __kmpc_atomic_fixed4_sub_fp
1701 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1702  0) // __kmpc_atomic_fixed4u_sub_fp
1703 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1704  0) // __kmpc_atomic_fixed4_mul_fp
1705 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1706  0) // __kmpc_atomic_fixed4u_mul_fp
1707 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1708  0) // __kmpc_atomic_fixed4_div_fp
1709 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1710  0) // __kmpc_atomic_fixed4u_div_fp
1711 
1712 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1713  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1714 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1715  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1716 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1717  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1718 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1719  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1720 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1721  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1722 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1723  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1724 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1725  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1726 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1727  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1728 
1729 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1730  KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1731 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1732  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1733 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1734  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1735 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1736  KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1737 
1738 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1739  KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1740 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1741  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1742 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1743  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1744 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1745  KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1746 
1747 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1748  1) // __kmpc_atomic_float10_add_fp
1749 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1750  1) // __kmpc_atomic_float10_sub_fp
1751 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1752  1) // __kmpc_atomic_float10_mul_fp
1753 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1754  1) // __kmpc_atomic_float10_div_fp
1755 
1756 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1757 // Reverse operations
1758 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1759  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1760 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1761  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1762 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1763  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1764 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1765  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1766 
1767 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1768  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1769 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1770  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1771 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1772  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1773 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1774  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1775 
1776 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1777  0) // __kmpc_atomic_fixed4_sub_rev_fp
1778 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1779  0) // __kmpc_atomic_fixed4u_sub_rev_fp
1780 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1781  0) // __kmpc_atomic_fixed4_div_rev_fp
1782 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1783  0) // __kmpc_atomic_fixed4u_div_rev_fp
1784 
1785 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1786  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1787 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1788  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1789 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1790  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1791 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1792  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1793 
1794 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1795  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1796 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1797  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1798 
1799 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1800  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1801 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1802  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1803 
1804 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1805  1) // __kmpc_atomic_float10_sub_rev_fp
1806 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1807  1) // __kmpc_atomic_float10_div_rev_fp
1808 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1809 
1810 #endif
1811 
1812 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1813 // ------------------------------------------------------------------------
1814 // X86 or X86_64: no alignment problems ====================================
1815 #if USE_CMPXCHG_FIX
1816 // workaround for C78287 (complex(kind=4) data type)
1817 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1818  LCK_ID, MASK, GOMP_FLAG) \
1819  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1820  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1821  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1822  }
1823 // end of the second part of the workaround for C78287
1824 #else
1825 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1826  LCK_ID, MASK, GOMP_FLAG) \
1827  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1828  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1829  OP_CMPXCHG(TYPE, BITS, OP) \
1830  }
1831 #endif // USE_CMPXCHG_FIX
1832 #else
1833 // ------------------------------------------------------------------------
1834 // Code for other architectures that don't handle unaligned accesses.
1835 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1836  LCK_ID, MASK, GOMP_FLAG) \
1837  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1838  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1839  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1840  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1841  } else { \
1842  KMP_CHECK_GTID; \
1843  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
1844  } \
1845  }
1846 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1847 
1848 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1849  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1850 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1851  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1852 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1853  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1854 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1855  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1856 
1857 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1858 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1859 
1860 // ------------------------------------------------------------------------
1861 // Atomic READ routines
1862 
1863 // ------------------------------------------------------------------------
1864 // Beginning of a definition (provides name, parameters, gebug trace)
1865 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1866 // fixed)
1867 // OP_ID - operation identifier (add, sub, mul, ...)
1868 // TYPE - operands' type
1869 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1870  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1871  TYPE *loc) { \
1872  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1873  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1874 
1875 // ------------------------------------------------------------------------
1876 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1877 // TYPE - operands' type
1878 // BITS - size in bits, used to distinguish low level calls
1879 // OP - operator
1880 // Note: temp_val introduced in order to force the compiler to read
1881 // *lhs only once (w/o it the compiler reads *lhs twice)
1882 // TODO: check if it is still necessary
1883 // Return old value regardless of the result of "compare & swap# operation
1884 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1885  { \
1886  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1887  union f_i_union { \
1888  TYPE f_val; \
1889  kmp_int##BITS i_val; \
1890  }; \
1891  union f_i_union old_value; \
1892  temp_val = *loc; \
1893  old_value.f_val = temp_val; \
1894  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1895  (kmp_int##BITS *)loc, \
1896  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1897  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1898  new_value = old_value.f_val; \
1899  return new_value; \
1900  }
1901 
1902 // -------------------------------------------------------------------------
1903 // Operation on *lhs, rhs bound by critical section
1904 // OP - operator (it's supposed to contain an assignment)
1905 // LCK_ID - lock identifier
1906 // Note: don't check gtid as it should always be valid
1907 // 1, 2-byte - expect valid parameter, other - check before this macro
1908 #define OP_CRITICAL_READ(OP, LCK_ID) \
1909  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1910  \
1911  new_value = (*loc); \
1912  \
1913  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1914 
1915 // -------------------------------------------------------------------------
1916 #ifdef KMP_GOMP_COMPAT
1917 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1918  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1919  KMP_CHECK_GTID; \
1920  OP_CRITICAL_READ(OP, 0); \
1921  return new_value; \
1922  }
1923 #else
1924 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1925 #endif /* KMP_GOMP_COMPAT */
1926 
1927 // -------------------------------------------------------------------------
1928 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1929  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1930  TYPE new_value; \
1931  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1932  new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1933  return new_value; \
1934  }
1935 // -------------------------------------------------------------------------
1936 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1937  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1938  TYPE new_value; \
1939  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1940  OP_CMPXCHG_READ(TYPE, BITS, OP) \
1941  }
1942 // ------------------------------------------------------------------------
1943 // Routines for Extended types: long double, _Quad, complex flavours (use
1944 // critical section)
1945 // TYPE_ID, OP_ID, TYPE - detailed above
1946 // OP - operator
1947 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1948 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1949  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1950  TYPE new_value; \
1951  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
1952  OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
1953  return new_value; \
1954  }
1955 
1956 // ------------------------------------------------------------------------
1957 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1958 // value doesn't work.
1959 // Let's return the read value through the additional parameter.
1960 #if (KMP_OS_WINDOWS)
1961 
1962 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
1963  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1964  \
1965  (*out) = (*loc); \
1966  \
1967  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1968 // ------------------------------------------------------------------------
1969 #ifdef KMP_GOMP_COMPAT
1970 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
1971  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1972  KMP_CHECK_GTID; \
1973  OP_CRITICAL_READ_WRK(OP, 0); \
1974  }
1975 #else
1976 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1977 #endif /* KMP_GOMP_COMPAT */
1978 // ------------------------------------------------------------------------
1979 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1980  void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1981  TYPE *loc) { \
1982  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1983  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1984 
1985 // ------------------------------------------------------------------------
1986 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1987  ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1988  OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
1989  OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
1990  }
1991 
1992 #endif // KMP_OS_WINDOWS
1993 
1994 // ------------------------------------------------------------------------
1995 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1996 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
1997 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
1998  KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
1999 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2000  KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2001 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2002  KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2003 
2004 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2005 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2006  KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2007 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2008  KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2009 
2010 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2011  1) // __kmpc_atomic_float10_rd
2012 #if KMP_HAVE_QUAD
2013 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2014  1) // __kmpc_atomic_float16_rd
2015 #endif // KMP_HAVE_QUAD
2016 
2017 // Fix for CQ220361 on Windows* OS
2018 #if (KMP_OS_WINDOWS)
2019 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2020  1) // __kmpc_atomic_cmplx4_rd
2021 #else
2022 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2023  1) // __kmpc_atomic_cmplx4_rd
2024 #endif
2025 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2026  1) // __kmpc_atomic_cmplx8_rd
2027 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2028  1) // __kmpc_atomic_cmplx10_rd
2029 #if KMP_HAVE_QUAD
2030 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2031  1) // __kmpc_atomic_cmplx16_rd
2032 #if (KMP_ARCH_X86)
2033 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2034  1) // __kmpc_atomic_float16_a16_rd
2035 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2036  1) // __kmpc_atomic_cmplx16_a16_rd
2037 #endif
2038 #endif
2039 
2040 // ------------------------------------------------------------------------
2041 // Atomic WRITE routines
2042 
2043 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2044  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2045  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2046  KMP_XCHG_FIXED##BITS(lhs, rhs); \
2047  }
2048 // ------------------------------------------------------------------------
2049 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2050  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2051  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2052  KMP_XCHG_REAL##BITS(lhs, rhs); \
2053  }
2054 
2055 // ------------------------------------------------------------------------
2056 // Operation on *lhs, rhs using "compare_and_store" routine
2057 // TYPE - operands' type
2058 // BITS - size in bits, used to distinguish low level calls
2059 // OP - operator
2060 // Note: temp_val introduced in order to force the compiler to read
2061 // *lhs only once (w/o it the compiler reads *lhs twice)
2062 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2063  { \
2064  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2065  TYPE old_value, new_value; \
2066  temp_val = *lhs; \
2067  old_value = temp_val; \
2068  new_value = rhs; \
2069  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2070  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2071  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2072  KMP_CPU_PAUSE(); \
2073  \
2074  temp_val = *lhs; \
2075  old_value = temp_val; \
2076  new_value = rhs; \
2077  } \
2078  }
2079 
2080 // -------------------------------------------------------------------------
2081 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2082  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2083  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2084  OP_CMPXCHG_WR(TYPE, BITS, OP) \
2085  }
2086 
2087 // ------------------------------------------------------------------------
2088 // Routines for Extended types: long double, _Quad, complex flavours (use
2089 // critical section)
2090 // TYPE_ID, OP_ID, TYPE - detailed above
2091 // OP - operator
2092 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2093 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2094  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2095  OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2096  OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2097  }
2098 // -------------------------------------------------------------------------
2099 
2100 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2101  KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2102 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2103  KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2104 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2105  KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2106 #if (KMP_ARCH_X86)
2107 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2108  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2109 #else
2110 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2111  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2112 #endif
2113 
2114 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2115  KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2116 #if (KMP_ARCH_X86)
2117 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2118  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2119 #else
2120 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2121  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2122 #endif
2123 
2124 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2125  1) // __kmpc_atomic_float10_wr
2126 #if KMP_HAVE_QUAD
2127 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2128  1) // __kmpc_atomic_float16_wr
2129 #endif
2130 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2131 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2132  1) // __kmpc_atomic_cmplx8_wr
2133 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2134  1) // __kmpc_atomic_cmplx10_wr
2135 #if KMP_HAVE_QUAD
2136 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2137  1) // __kmpc_atomic_cmplx16_wr
2138 #if (KMP_ARCH_X86)
2139 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2140  1) // __kmpc_atomic_float16_a16_wr
2141 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2142  1) // __kmpc_atomic_cmplx16_a16_wr
2143 #endif
2144 #endif
2145 
2146 // ------------------------------------------------------------------------
2147 // Atomic CAPTURE routines
2148 
2149 // Beginning of a definition (provides name, parameters, gebug trace)
2150 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2151 // fixed)
2152 // OP_ID - operation identifier (add, sub, mul, ...)
2153 // TYPE - operands' type
2154 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2155  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2156  TYPE *lhs, TYPE rhs, int flag) { \
2157  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2158  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2159 
2160 // -------------------------------------------------------------------------
2161 // Operation on *lhs, rhs bound by critical section
2162 // OP - operator (it's supposed to contain an assignment)
2163 // LCK_ID - lock identifier
2164 // Note: don't check gtid as it should always be valid
2165 // 1, 2-byte - expect valid parameter, other - check before this macro
2166 #define OP_CRITICAL_CPT(OP, LCK_ID) \
2167  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2168  \
2169  if (flag) { \
2170  (*lhs) OP rhs; \
2171  new_value = (*lhs); \
2172  } else { \
2173  new_value = (*lhs); \
2174  (*lhs) OP rhs; \
2175  } \
2176  \
2177  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2178  return new_value;
2179 
2180 // ------------------------------------------------------------------------
2181 #ifdef KMP_GOMP_COMPAT
2182 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) \
2183  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2184  KMP_CHECK_GTID; \
2185  OP_CRITICAL_CPT(OP## =, 0); \
2186  }
2187 #else
2188 #define OP_GOMP_CRITICAL_CPT(OP, FLAG)
2189 #endif /* KMP_GOMP_COMPAT */
2190 
2191 // ------------------------------------------------------------------------
2192 // Operation on *lhs, rhs using "compare_and_store" routine
2193 // TYPE - operands' type
2194 // BITS - size in bits, used to distinguish low level calls
2195 // OP - operator
2196 // Note: temp_val introduced in order to force the compiler to read
2197 // *lhs only once (w/o it the compiler reads *lhs twice)
2198 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2199  { \
2200  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2201  TYPE old_value, new_value; \
2202  temp_val = *lhs; \
2203  old_value = temp_val; \
2204  new_value = old_value OP rhs; \
2205  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2206  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2207  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2208  KMP_CPU_PAUSE(); \
2209  \
2210  temp_val = *lhs; \
2211  old_value = temp_val; \
2212  new_value = old_value OP rhs; \
2213  } \
2214  if (flag) { \
2215  return new_value; \
2216  } else \
2217  return old_value; \
2218  }
2219 
2220 // -------------------------------------------------------------------------
2221 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2222  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2223  TYPE new_value; \
2224  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2225  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2226  }
2227 
2228 // -------------------------------------------------------------------------
2229 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2230  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2231  TYPE old_value, new_value; \
2232  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2233  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2234  old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2235  if (flag) { \
2236  return old_value OP rhs; \
2237  } else \
2238  return old_value; \
2239  }
2240 // -------------------------------------------------------------------------
2241 
2242 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2243  0) // __kmpc_atomic_fixed4_add_cpt
2244 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2245  0) // __kmpc_atomic_fixed4_sub_cpt
2246 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2247  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2248 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2249  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2250 
2251 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2252  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2253 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2254  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2255 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2256  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2257 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2258  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2259 
2260 // ------------------------------------------------------------------------
2261 // Entries definition for integer operands
2262 // TYPE_ID - operands type and size (fixed4, float4)
2263 // OP_ID - operation identifier (add, sub, mul, ...)
2264 // TYPE - operand type
2265 // BITS - size in bits, used to distinguish low level calls
2266 // OP - operator (used in critical section)
2267 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2268 // ------------------------------------------------------------------------
2269 // Routines for ATOMIC integer operands, other operators
2270 // ------------------------------------------------------------------------
2271 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2272 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2273  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2274 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2275  0) // __kmpc_atomic_fixed1_andb_cpt
2276 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2277  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2278 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2279  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2280 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2281  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2282 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2283  0) // __kmpc_atomic_fixed1_orb_cpt
2284 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2285  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2286 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2287  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2288 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2289  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2290 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2291  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2292 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2293  0) // __kmpc_atomic_fixed1_xor_cpt
2294 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2295  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2296 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2297  0) // __kmpc_atomic_fixed2_andb_cpt
2298 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2299  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2300 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2301  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2302 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2303  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2304 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2305  0) // __kmpc_atomic_fixed2_orb_cpt
2306 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2307  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2308 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2309  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2310 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2311  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2312 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2313  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2314 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2315  0) // __kmpc_atomic_fixed2_xor_cpt
2316 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2317  0) // __kmpc_atomic_fixed4_andb_cpt
2318 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2319  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2320 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2321  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2322 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2323  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2324 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2325  0) // __kmpc_atomic_fixed4_orb_cpt
2326 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2327  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2328 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2329  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2330 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2331  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2332 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2333  0) // __kmpc_atomic_fixed4_xor_cpt
2334 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2335  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2336 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2337  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2338 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2339  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2340 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2341  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2342 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2343  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2344 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2345  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2347  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2349  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2351  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2352 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2353  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2354 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2355  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2356 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2357  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2358 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2359  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2360 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2361 
2362 // CAPTURE routines for mixed types RHS=float16
2363 #if KMP_HAVE_QUAD
2364 
2365 // Beginning of a definition (provides name, parameters, gebug trace)
2366 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2367 // fixed)
2368 // OP_ID - operation identifier (add, sub, mul, ...)
2369 // TYPE - operands' type
2370 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2371  TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2372  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2373  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2374  KA_TRACE(100, \
2375  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2376  gtid));
2377 
2378 // -------------------------------------------------------------------------
2379 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2380  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2381  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2382  TYPE new_value; \
2383  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2384  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2385  }
2386 
2387 // -------------------------------------------------------------------------
2388 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2389  LCK_ID, GOMP_FLAG) \
2390  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2391  TYPE new_value; \
2392  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \
2393  OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \
2394  }
2395 
2396 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2397  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2398 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2399  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2400 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2401  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2402 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2403  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2404 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2405  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2406 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2407  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2408 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2409  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2410 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2411  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2412 
2413 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2414  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2415 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2416  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2417 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2418  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2419 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2420  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2421 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2422  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2423 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2424  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2425 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2426  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2427 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2428  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2429 
2430 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2431  0) // __kmpc_atomic_fixed4_add_cpt_fp
2432 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2433  0) // __kmpc_atomic_fixed4u_add_cpt_fp
2434 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2435  0) // __kmpc_atomic_fixed4_sub_cpt_fp
2436 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2437  0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2438 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2439  0) // __kmpc_atomic_fixed4_mul_cpt_fp
2440 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2441  0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2442 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2443  0) // __kmpc_atomic_fixed4_div_cpt_fp
2444 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2445  0) // __kmpc_atomic_fixed4u_div_cpt_fp
2446 
2447 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2448  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2449 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2450  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2451 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2452  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2453 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2454  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2455 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2456  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2457 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2458  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2459 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2460  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2461 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2462  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2463 
2464 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2465  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2466 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2467  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2468 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2469  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2471  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2472 
2473 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2474  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2475 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2476  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2477 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2478  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2479 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2480  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2481 
2482 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2483  1) // __kmpc_atomic_float10_add_cpt_fp
2484 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2485  1) // __kmpc_atomic_float10_sub_cpt_fp
2486 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2487  1) // __kmpc_atomic_float10_mul_cpt_fp
2488 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2489  1) // __kmpc_atomic_float10_div_cpt_fp
2490 
2491 #endif // KMP_HAVE_QUAD
2492 
2493 // ------------------------------------------------------------------------
2494 // Routines for C/C++ Reduction operators && and ||
2495 
2496 // -------------------------------------------------------------------------
2497 // Operation on *lhs, rhs bound by critical section
2498 // OP - operator (it's supposed to contain an assignment)
2499 // LCK_ID - lock identifier
2500 // Note: don't check gtid as it should always be valid
2501 // 1, 2-byte - expect valid parameter, other - check before this macro
2502 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2503  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2504  \
2505  if (flag) { \
2506  new_value OP rhs; \
2507  } else \
2508  new_value = (*lhs); \
2509  \
2510  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2511 
2512 // ------------------------------------------------------------------------
2513 #ifdef KMP_GOMP_COMPAT
2514 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2515  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2516  KMP_CHECK_GTID; \
2517  OP_CRITICAL_L_CPT(OP, 0); \
2518  return new_value; \
2519  }
2520 #else
2521 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2522 #endif /* KMP_GOMP_COMPAT */
2523 
2524 // ------------------------------------------------------------------------
2525 // Need separate macros for &&, || because there is no combined assignment
2526 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2527  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2528  TYPE new_value; \
2529  OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2530  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2531  }
2532 
2533 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2534  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2535 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2536  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2537 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2538  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2539 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2540  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2541 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2542  0) // __kmpc_atomic_fixed4_andl_cpt
2543 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2544  0) // __kmpc_atomic_fixed4_orl_cpt
2545 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2546  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2547 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2548  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2549 
2550 // -------------------------------------------------------------------------
2551 // Routines for Fortran operators that matched no one in C:
2552 // MAX, MIN, .EQV., .NEQV.
2553 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2554 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2555 
2556 // -------------------------------------------------------------------------
2557 // MIN and MAX need separate macros
2558 // OP - operator to check if we need any actions?
2559 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2560  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2561  \
2562  if (*lhs OP rhs) { /* still need actions? */ \
2563  old_value = *lhs; \
2564  *lhs = rhs; \
2565  if (flag) \
2566  new_value = rhs; \
2567  else \
2568  new_value = old_value; \
2569  } \
2570  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2571  return new_value;
2572 
2573 // -------------------------------------------------------------------------
2574 #ifdef KMP_GOMP_COMPAT
2575 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2576  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2577  KMP_CHECK_GTID; \
2578  MIN_MAX_CRITSECT_CPT(OP, 0); \
2579  }
2580 #else
2581 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2582 #endif /* KMP_GOMP_COMPAT */
2583 
2584 // -------------------------------------------------------------------------
2585 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2586  { \
2587  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2588  /*TYPE old_value; */ \
2589  temp_val = *lhs; \
2590  old_value = temp_val; \
2591  while (old_value OP rhs && /* still need actions? */ \
2592  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2593  (kmp_int##BITS *)lhs, \
2594  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2595  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2596  KMP_CPU_PAUSE(); \
2597  temp_val = *lhs; \
2598  old_value = temp_val; \
2599  } \
2600  if (flag) \
2601  return rhs; \
2602  else \
2603  return old_value; \
2604  }
2605 
2606 // -------------------------------------------------------------------------
2607 // 1-byte, 2-byte operands - use critical section
2608 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2609  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2610  TYPE new_value, old_value; \
2611  if (*lhs OP rhs) { /* need actions? */ \
2612  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2613  MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2614  } \
2615  return *lhs; \
2616  }
2617 
2618 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2619  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2620  TYPE new_value, old_value; \
2621  if (*lhs OP rhs) { \
2622  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2623  MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2624  } \
2625  return *lhs; \
2626  }
2627 
2628 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2629  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2630 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2631  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2632 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2633  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2634 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2635  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2636 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2637  0) // __kmpc_atomic_fixed4_max_cpt
2638 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2639  0) // __kmpc_atomic_fixed4_min_cpt
2640 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2641  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2642 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2643  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2644 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2645  KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2646 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2647  KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2648 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2649  KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2650 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2651  KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2652 #if KMP_HAVE_QUAD
2653 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2654  1) // __kmpc_atomic_float16_max_cpt
2655 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2656  1) // __kmpc_atomic_float16_min_cpt
2657 #if (KMP_ARCH_X86)
2658 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2659  1) // __kmpc_atomic_float16_max_a16_cpt
2660 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2661  1) // __kmpc_atomic_float16_mix_a16_cpt
2662 #endif
2663 #endif
2664 
2665 // ------------------------------------------------------------------------
2666 #ifdef KMP_GOMP_COMPAT
2667 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2668  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2669  KMP_CHECK_GTID; \
2670  OP_CRITICAL_CPT(OP, 0); \
2671  }
2672 #else
2673 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2674 #endif /* KMP_GOMP_COMPAT */
2675 // ------------------------------------------------------------------------
2676 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2677  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2678  TYPE new_value; \
2679  OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */ \
2680  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2681  }
2682 
2683 // ------------------------------------------------------------------------
2684 
2685 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2686  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2687 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2688  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2689 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2690  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2691 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2692  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2693 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2694  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2695 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2696  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2697 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2698  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2699 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2700  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2701 
2702 // ------------------------------------------------------------------------
2703 // Routines for Extended types: long double, _Quad, complex flavours (use
2704 // critical section)
2705 // TYPE_ID, OP_ID, TYPE - detailed above
2706 // OP - operator
2707 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2708 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2709  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2710  TYPE new_value; \
2711  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \
2712  OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \
2713  }
2714 
2715 // ------------------------------------------------------------------------
2716 // Workaround for cmplx4. Regular routines with return value don't work
2717 // on Win_32e. Let's return captured values through the additional parameter.
2718 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2719  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2720  \
2721  if (flag) { \
2722  (*lhs) OP rhs; \
2723  (*out) = (*lhs); \
2724  } else { \
2725  (*out) = (*lhs); \
2726  (*lhs) OP rhs; \
2727  } \
2728  \
2729  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2730  return;
2731 // ------------------------------------------------------------------------
2732 
2733 #ifdef KMP_GOMP_COMPAT
2734 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2735  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2736  KMP_CHECK_GTID; \
2737  OP_CRITICAL_CPT_WRK(OP## =, 0); \
2738  }
2739 #else
2740 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2741 #endif /* KMP_GOMP_COMPAT */
2742 // ------------------------------------------------------------------------
2743 
2744 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2745  void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2746  TYPE rhs, TYPE *out, int flag) { \
2747  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2748  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2749 // ------------------------------------------------------------------------
2750 
2751 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2752  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2753  OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2754  OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2755  }
2756 // The end of workaround for cmplx4
2757 
2758 /* ------------------------------------------------------------------------- */
2759 // routines for long double type
2760 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2761  1) // __kmpc_atomic_float10_add_cpt
2762 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2763  1) // __kmpc_atomic_float10_sub_cpt
2764 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2765  1) // __kmpc_atomic_float10_mul_cpt
2766 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2767  1) // __kmpc_atomic_float10_div_cpt
2768 #if KMP_HAVE_QUAD
2769 // routines for _Quad type
2770 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2771  1) // __kmpc_atomic_float16_add_cpt
2772 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2773  1) // __kmpc_atomic_float16_sub_cpt
2774 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2775  1) // __kmpc_atomic_float16_mul_cpt
2776 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2777  1) // __kmpc_atomic_float16_div_cpt
2778 #if (KMP_ARCH_X86)
2779 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2780  1) // __kmpc_atomic_float16_add_a16_cpt
2781 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2782  1) // __kmpc_atomic_float16_sub_a16_cpt
2783 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2784  1) // __kmpc_atomic_float16_mul_a16_cpt
2785 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2786  1) // __kmpc_atomic_float16_div_a16_cpt
2787 #endif
2788 #endif
2789 
2790 // routines for complex types
2791 
2792 // cmplx4 routines to return void
2793 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2794  1) // __kmpc_atomic_cmplx4_add_cpt
2795 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2796  1) // __kmpc_atomic_cmplx4_sub_cpt
2797 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2798  1) // __kmpc_atomic_cmplx4_mul_cpt
2799 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2800  1) // __kmpc_atomic_cmplx4_div_cpt
2801 
2802 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2803  1) // __kmpc_atomic_cmplx8_add_cpt
2804 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2805  1) // __kmpc_atomic_cmplx8_sub_cpt
2806 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2807  1) // __kmpc_atomic_cmplx8_mul_cpt
2808 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2809  1) // __kmpc_atomic_cmplx8_div_cpt
2810 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2811  1) // __kmpc_atomic_cmplx10_add_cpt
2812 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2813  1) // __kmpc_atomic_cmplx10_sub_cpt
2814 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2815  1) // __kmpc_atomic_cmplx10_mul_cpt
2816 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2817  1) // __kmpc_atomic_cmplx10_div_cpt
2818 #if KMP_HAVE_QUAD
2819 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2820  1) // __kmpc_atomic_cmplx16_add_cpt
2821 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2822  1) // __kmpc_atomic_cmplx16_sub_cpt
2823 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2824  1) // __kmpc_atomic_cmplx16_mul_cpt
2825 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2826  1) // __kmpc_atomic_cmplx16_div_cpt
2827 #if (KMP_ARCH_X86)
2828 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2829  1) // __kmpc_atomic_cmplx16_add_a16_cpt
2830 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2831  1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2832 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2833  1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2834 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2835  1) // __kmpc_atomic_cmplx16_div_a16_cpt
2836 #endif
2837 #endif
2838 
2839 #if OMP_40_ENABLED
2840 
2841 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2842 // binop x; v = x; } for non-commutative operations.
2843 // Supported only on IA-32 architecture and Intel(R) 64
2844 
2845 // -------------------------------------------------------------------------
2846 // Operation on *lhs, rhs bound by critical section
2847 // OP - operator (it's supposed to contain an assignment)
2848 // LCK_ID - lock identifier
2849 // Note: don't check gtid as it should always be valid
2850 // 1, 2-byte - expect valid parameter, other - check before this macro
2851 #define OP_CRITICAL_CPT_REV(OP, LCK_ID) \
2852  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2853  \
2854  if (flag) { \
2855  /*temp_val = (*lhs);*/ \
2856  (*lhs) = (rhs)OP(*lhs); \
2857  new_value = (*lhs); \
2858  } else { \
2859  new_value = (*lhs); \
2860  (*lhs) = (rhs)OP(*lhs); \
2861  } \
2862  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2863  return new_value;
2864 
2865 // ------------------------------------------------------------------------
2866 #ifdef KMP_GOMP_COMPAT
2867 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) \
2868  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2869  KMP_CHECK_GTID; \
2870  OP_CRITICAL_CPT_REV(OP, 0); \
2871  }
2872 #else
2873 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)
2874 #endif /* KMP_GOMP_COMPAT */
2875 
2876 // ------------------------------------------------------------------------
2877 // Operation on *lhs, rhs using "compare_and_store" routine
2878 // TYPE - operands' type
2879 // BITS - size in bits, used to distinguish low level calls
2880 // OP - operator
2881 // Note: temp_val introduced in order to force the compiler to read
2882 // *lhs only once (w/o it the compiler reads *lhs twice)
2883 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2884  { \
2885  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2886  TYPE old_value, new_value; \
2887  temp_val = *lhs; \
2888  old_value = temp_val; \
2889  new_value = rhs OP old_value; \
2890  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2891  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2892  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2893  KMP_CPU_PAUSE(); \
2894  \
2895  temp_val = *lhs; \
2896  old_value = temp_val; \
2897  new_value = rhs OP old_value; \
2898  } \
2899  if (flag) { \
2900  return new_value; \
2901  } else \
2902  return old_value; \
2903  }
2904 
2905 // -------------------------------------------------------------------------
2906 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2907  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2908  TYPE new_value; \
2909  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2910  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
2911  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2912  }
2913 
2914 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2915  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2916 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2917  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2918 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2919  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2920 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2921  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2922 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2923  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2924 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2925  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2926 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2927  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2928 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2929  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2930 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2931  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2932 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2933  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2934 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2935  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2936 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2937  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2938 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2939  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2940 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2941  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2942 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2943  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2944 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2945  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2946 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2947  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2948 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2949  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2950 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2951  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2952 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2953  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2954 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2955  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2956 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2957  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2958 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2959  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2960 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2961  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2962 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2963  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2964 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2965  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
2966 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
2967  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
2968 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
2969  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
2970 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2971 
2972 // ------------------------------------------------------------------------
2973 // Routines for Extended types: long double, _Quad, complex flavours (use
2974 // critical section)
2975 // TYPE_ID, OP_ID, TYPE - detailed above
2976 // OP - operator
2977 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2978 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2979  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2980  TYPE new_value; \
2981  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2982  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
2983  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
2984  OP_CRITICAL_CPT_REV(OP, LCK_ID) \
2985  }
2986 
2987 /* ------------------------------------------------------------------------- */
2988 // routines for long double type
2989 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
2990  1) // __kmpc_atomic_float10_sub_cpt_rev
2991 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
2992  1) // __kmpc_atomic_float10_div_cpt_rev
2993 #if KMP_HAVE_QUAD
2994 // routines for _Quad type
2995 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
2996  1) // __kmpc_atomic_float16_sub_cpt_rev
2997 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
2998  1) // __kmpc_atomic_float16_div_cpt_rev
2999 #if (KMP_ARCH_X86)
3000 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3001  1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3002 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3003  1) // __kmpc_atomic_float16_div_a16_cpt_rev
3004 #endif
3005 #endif
3006 
3007 // routines for complex types
3008 
3009 // ------------------------------------------------------------------------
3010 // Workaround for cmplx4. Regular routines with return value don't work
3011 // on Win_32e. Let's return captured values through the additional parameter.
3012 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3013  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3014  \
3015  if (flag) { \
3016  (*lhs) = (rhs)OP(*lhs); \
3017  (*out) = (*lhs); \
3018  } else { \
3019  (*out) = (*lhs); \
3020  (*lhs) = (rhs)OP(*lhs); \
3021  } \
3022  \
3023  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3024  return;
3025 // ------------------------------------------------------------------------
3026 
3027 #ifdef KMP_GOMP_COMPAT
3028 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3029  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3030  KMP_CHECK_GTID; \
3031  OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3032  }
3033 #else
3034 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3035 #endif /* KMP_GOMP_COMPAT */
3036 // ------------------------------------------------------------------------
3037 
3038 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3039  GOMP_FLAG) \
3040  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3041  OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3042  OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3043  }
3044 // The end of workaround for cmplx4
3045 
3046 // !!! TODO: check if we need to return void for cmplx4 routines
3047 // cmplx4 routines to return void
3048 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3049  1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3050 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3051  1) // __kmpc_atomic_cmplx4_div_cpt_rev
3052 
3053 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3054  1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3055 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3056  1) // __kmpc_atomic_cmplx8_div_cpt_rev
3057 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3058  1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3059 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3060  1) // __kmpc_atomic_cmplx10_div_cpt_rev
3061 #if KMP_HAVE_QUAD
3062 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3063  1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3064 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3065  1) // __kmpc_atomic_cmplx16_div_cpt_rev
3066 #if (KMP_ARCH_X86)
3067 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3068  1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3069 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3070  1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3071 #endif
3072 #endif
3073 
3074 // Capture reverse for mixed type: RHS=float16
3075 #if KMP_HAVE_QUAD
3076 
3077 // Beginning of a definition (provides name, parameters, gebug trace)
3078 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3079 // fixed)
3080 // OP_ID - operation identifier (add, sub, mul, ...)
3081 // TYPE - operands' type
3082 // -------------------------------------------------------------------------
3083 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3084  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3085  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3086  TYPE new_value; \
3087  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
3088  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3089  }
3090 
3091 // -------------------------------------------------------------------------
3092 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3093  LCK_ID, GOMP_FLAG) \
3094  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3095  TYPE new_value; \
3096  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */ \
3097  OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */ \
3098  }
3099 
3100 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3101  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3102 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3103  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3104 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3105  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3106 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3107  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3108 
3109 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3110  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3111 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3112  1,
3113  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3114 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3115  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3116 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3117  1,
3118  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3119 
3120 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3121  3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3122 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3123  4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3124 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3125  3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3126 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3127  4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3128 
3129 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3130  7,
3131  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3132 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3133  8i, 7,
3134  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3135 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3136  7,
3137  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3138 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3139  8i, 7,
3140  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3141 
3142 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3143  4r, 3,
3144  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3145 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3146  4r, 3,
3147  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3148 
3149 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3150  8r, 7,
3151  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3152 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3153  8r, 7,
3154  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3155 
3156 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3157  10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3158 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3159  10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3160 
3161 #endif // KMP_HAVE_QUAD
3162 
3163 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3164 
3165 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3166  TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3167  TYPE rhs) { \
3168  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3169  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3170 
3171 #define CRITICAL_SWP(LCK_ID) \
3172  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3173  \
3174  old_value = (*lhs); \
3175  (*lhs) = rhs; \
3176  \
3177  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3178  return old_value;
3179 
3180 // ------------------------------------------------------------------------
3181 #ifdef KMP_GOMP_COMPAT
3182 #define GOMP_CRITICAL_SWP(FLAG) \
3183  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3184  KMP_CHECK_GTID; \
3185  CRITICAL_SWP(0); \
3186  }
3187 #else
3188 #define GOMP_CRITICAL_SWP(FLAG)
3189 #endif /* KMP_GOMP_COMPAT */
3190 
3191 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3192  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3193  TYPE old_value; \
3194  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3195  old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3196  return old_value; \
3197  }
3198 // ------------------------------------------------------------------------
3199 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3200  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3201  TYPE old_value; \
3202  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3203  old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3204  return old_value; \
3205  }
3206 
3207 // ------------------------------------------------------------------------
3208 #define CMPXCHG_SWP(TYPE, BITS) \
3209  { \
3210  TYPE KMP_ATOMIC_VOLATILE temp_val; \
3211  TYPE old_value, new_value; \
3212  temp_val = *lhs; \
3213  old_value = temp_val; \
3214  new_value = rhs; \
3215  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3216  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3217  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3218  KMP_CPU_PAUSE(); \
3219  \
3220  temp_val = *lhs; \
3221  old_value = temp_val; \
3222  new_value = rhs; \
3223  } \
3224  return old_value; \
3225  }
3226 
3227 // -------------------------------------------------------------------------
3228 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3229  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3230  TYPE old_value; \
3231  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3232  CMPXCHG_SWP(TYPE, BITS) \
3233  }
3234 
3235 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3236 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3237 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3238 
3239 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3240  KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3241 
3242 #if (KMP_ARCH_X86)
3243 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3244  KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3245 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3246  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3247 #else
3248 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3249 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3250  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3251 #endif
3252 
3253 // ------------------------------------------------------------------------
3254 // Routines for Extended types: long double, _Quad, complex flavours (use
3255 // critical section)
3256 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3257  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3258  TYPE old_value; \
3259  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3260  CRITICAL_SWP(LCK_ID) \
3261  }
3262 
3263 // ------------------------------------------------------------------------
3264 // !!! TODO: check if we need to return void for cmplx4 routines
3265 // Workaround for cmplx4. Regular routines with return value don't work
3266 // on Win_32e. Let's return captured values through the additional parameter.
3267 
3268 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3269  void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3270  TYPE rhs, TYPE *out) { \
3271  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3272  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3273 
3274 #define CRITICAL_SWP_WRK(LCK_ID) \
3275  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3276  \
3277  tmp = (*lhs); \
3278  (*lhs) = (rhs); \
3279  (*out) = tmp; \
3280  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3281  return;
3282 // ------------------------------------------------------------------------
3283 
3284 #ifdef KMP_GOMP_COMPAT
3285 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
3286  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3287  KMP_CHECK_GTID; \
3288  CRITICAL_SWP_WRK(0); \
3289  }
3290 #else
3291 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3292 #endif /* KMP_GOMP_COMPAT */
3293 // ------------------------------------------------------------------------
3294 
3295 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3296  ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3297  TYPE tmp; \
3298  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3299  CRITICAL_SWP_WRK(LCK_ID) \
3300  }
3301 // The end of workaround for cmplx4
3302 
3303 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3304 #if KMP_HAVE_QUAD
3305 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3306 #endif
3307 // cmplx4 routine to return void
3308 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3309 
3310 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3311 // __kmpc_atomic_cmplx4_swp
3312 
3313 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3314 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3315 #if KMP_HAVE_QUAD
3316 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3317 #if (KMP_ARCH_X86)
3318 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3319  1) // __kmpc_atomic_float16_a16_swp
3320 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3321  1) // __kmpc_atomic_cmplx16_a16_swp
3322 #endif
3323 #endif
3324 
3325 // End of OpenMP 4.0 Capture
3326 
3327 #endif // OMP_40_ENABLED
3328 
3329 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3330 
3331 #undef OP_CRITICAL
3332 
3333 /* ------------------------------------------------------------------------ */
3334 /* Generic atomic routines */
3335 
3336 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3337  void (*f)(void *, void *, void *)) {
3338  KMP_DEBUG_ASSERT(__kmp_init_serial);
3339 
3340  if (
3341 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3342  FALSE /* must use lock */
3343 #else
3344  TRUE
3345 #endif
3346  ) {
3347  kmp_int8 old_value, new_value;
3348 
3349  old_value = *(kmp_int8 *)lhs;
3350  (*f)(&new_value, &old_value, rhs);
3351 
3352  /* TODO: Should this be acquire or release? */
3353  while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3354  *(kmp_int8 *)&new_value)) {
3355  KMP_CPU_PAUSE();
3356 
3357  old_value = *(kmp_int8 *)lhs;
3358  (*f)(&new_value, &old_value, rhs);
3359  }
3360 
3361  return;
3362  } else {
3363 // All 1-byte data is of integer data type.
3364 
3365 #ifdef KMP_GOMP_COMPAT
3366  if (__kmp_atomic_mode == 2) {
3367  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3368  } else
3369 #endif /* KMP_GOMP_COMPAT */
3370  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3371 
3372  (*f)(lhs, lhs, rhs);
3373 
3374 #ifdef KMP_GOMP_COMPAT
3375  if (__kmp_atomic_mode == 2) {
3376  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3377  } else
3378 #endif /* KMP_GOMP_COMPAT */
3379  __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3380  }
3381 }
3382 
3383 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3384  void (*f)(void *, void *, void *)) {
3385  if (
3386 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3387  FALSE /* must use lock */
3388 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3389  TRUE /* no alignment problems */
3390 #else
3391  !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3392 #endif
3393  ) {
3394  kmp_int16 old_value, new_value;
3395 
3396  old_value = *(kmp_int16 *)lhs;
3397  (*f)(&new_value, &old_value, rhs);
3398 
3399  /* TODO: Should this be acquire or release? */
3400  while (!KMP_COMPARE_AND_STORE_ACQ16(
3401  (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3402  KMP_CPU_PAUSE();
3403 
3404  old_value = *(kmp_int16 *)lhs;
3405  (*f)(&new_value, &old_value, rhs);
3406  }
3407 
3408  return;
3409  } else {
3410 // All 2-byte data is of integer data type.
3411 
3412 #ifdef KMP_GOMP_COMPAT
3413  if (__kmp_atomic_mode == 2) {
3414  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3415  } else
3416 #endif /* KMP_GOMP_COMPAT */
3417  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3418 
3419  (*f)(lhs, lhs, rhs);
3420 
3421 #ifdef KMP_GOMP_COMPAT
3422  if (__kmp_atomic_mode == 2) {
3423  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3424  } else
3425 #endif /* KMP_GOMP_COMPAT */
3426  __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3427  }
3428 }
3429 
3430 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3431  void (*f)(void *, void *, void *)) {
3432  KMP_DEBUG_ASSERT(__kmp_init_serial);
3433 
3434  if (
3435 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3436 // Gomp compatibility is broken if this routine is called for floats.
3437 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3438  TRUE /* no alignment problems */
3439 #else
3440  !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3441 #endif
3442  ) {
3443  kmp_int32 old_value, new_value;
3444 
3445  old_value = *(kmp_int32 *)lhs;
3446  (*f)(&new_value, &old_value, rhs);
3447 
3448  /* TODO: Should this be acquire or release? */
3449  while (!KMP_COMPARE_AND_STORE_ACQ32(
3450  (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3451  KMP_CPU_PAUSE();
3452 
3453  old_value = *(kmp_int32 *)lhs;
3454  (*f)(&new_value, &old_value, rhs);
3455  }
3456 
3457  return;
3458  } else {
3459 // Use __kmp_atomic_lock_4i for all 4-byte data,
3460 // even if it isn't of integer data type.
3461 
3462 #ifdef KMP_GOMP_COMPAT
3463  if (__kmp_atomic_mode == 2) {
3464  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3465  } else
3466 #endif /* KMP_GOMP_COMPAT */
3467  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3468 
3469  (*f)(lhs, lhs, rhs);
3470 
3471 #ifdef KMP_GOMP_COMPAT
3472  if (__kmp_atomic_mode == 2) {
3473  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3474  } else
3475 #endif /* KMP_GOMP_COMPAT */
3476  __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3477  }
3478 }
3479 
3480 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3481  void (*f)(void *, void *, void *)) {
3482  KMP_DEBUG_ASSERT(__kmp_init_serial);
3483  if (
3484 
3485 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3486  FALSE /* must use lock */
3487 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3488  TRUE /* no alignment problems */
3489 #else
3490  !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3491 #endif
3492  ) {
3493  kmp_int64 old_value, new_value;
3494 
3495  old_value = *(kmp_int64 *)lhs;
3496  (*f)(&new_value, &old_value, rhs);
3497  /* TODO: Should this be acquire or release? */
3498  while (!KMP_COMPARE_AND_STORE_ACQ64(
3499  (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3500  KMP_CPU_PAUSE();
3501 
3502  old_value = *(kmp_int64 *)lhs;
3503  (*f)(&new_value, &old_value, rhs);
3504  }
3505 
3506  return;
3507  } else {
3508 // Use __kmp_atomic_lock_8i for all 8-byte data,
3509 // even if it isn't of integer data type.
3510 
3511 #ifdef KMP_GOMP_COMPAT
3512  if (__kmp_atomic_mode == 2) {
3513  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3514  } else
3515 #endif /* KMP_GOMP_COMPAT */
3516  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3517 
3518  (*f)(lhs, lhs, rhs);
3519 
3520 #ifdef KMP_GOMP_COMPAT
3521  if (__kmp_atomic_mode == 2) {
3522  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3523  } else
3524 #endif /* KMP_GOMP_COMPAT */
3525  __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3526  }
3527 }
3528 
3529 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3530  void (*f)(void *, void *, void *)) {
3531  KMP_DEBUG_ASSERT(__kmp_init_serial);
3532 
3533 #ifdef KMP_GOMP_COMPAT
3534  if (__kmp_atomic_mode == 2) {
3535  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3536  } else
3537 #endif /* KMP_GOMP_COMPAT */
3538  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3539 
3540  (*f)(lhs, lhs, rhs);
3541 
3542 #ifdef KMP_GOMP_COMPAT
3543  if (__kmp_atomic_mode == 2) {
3544  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3545  } else
3546 #endif /* KMP_GOMP_COMPAT */
3547  __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3548 }
3549 
3550 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3551  void (*f)(void *, void *, void *)) {
3552  KMP_DEBUG_ASSERT(__kmp_init_serial);
3553 
3554 #ifdef KMP_GOMP_COMPAT
3555  if (__kmp_atomic_mode == 2) {
3556  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3557  } else
3558 #endif /* KMP_GOMP_COMPAT */
3559  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3560 
3561  (*f)(lhs, lhs, rhs);
3562 
3563 #ifdef KMP_GOMP_COMPAT
3564  if (__kmp_atomic_mode == 2) {
3565  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3566  } else
3567 #endif /* KMP_GOMP_COMPAT */
3568  __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3569 }
3570 
3571 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3572  void (*f)(void *, void *, void *)) {
3573  KMP_DEBUG_ASSERT(__kmp_init_serial);
3574 
3575 #ifdef KMP_GOMP_COMPAT
3576  if (__kmp_atomic_mode == 2) {
3577  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3578  } else
3579 #endif /* KMP_GOMP_COMPAT */
3580  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3581 
3582  (*f)(lhs, lhs, rhs);
3583 
3584 #ifdef KMP_GOMP_COMPAT
3585  if (__kmp_atomic_mode == 2) {
3586  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3587  } else
3588 #endif /* KMP_GOMP_COMPAT */
3589  __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3590 }
3591 
3592 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3593  void (*f)(void *, void *, void *)) {
3594  KMP_DEBUG_ASSERT(__kmp_init_serial);
3595 
3596 #ifdef KMP_GOMP_COMPAT
3597  if (__kmp_atomic_mode == 2) {
3598  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3599  } else
3600 #endif /* KMP_GOMP_COMPAT */
3601  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3602 
3603  (*f)(lhs, lhs, rhs);
3604 
3605 #ifdef KMP_GOMP_COMPAT
3606  if (__kmp_atomic_mode == 2) {
3607  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3608  } else
3609 #endif /* KMP_GOMP_COMPAT */
3610  __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3611 }
3612 
3613 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3614 // compiler; duplicated in order to not use 3-party names in pure Intel code
3615 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3616 void __kmpc_atomic_start(void) {
3617  int gtid = __kmp_entry_gtid();
3618  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3619  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3620 }
3621 
3622 void __kmpc_atomic_end(void) {
3623  int gtid = __kmp_get_gtid();
3624  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3625  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3626 }
3627 
3632 // end of file
Definition: kmp.h:207