LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_atomic.cpp
1/*
2 * kmp_atomic.cpp -- ATOMIC implementation routines
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp_atomic.h"
14#include "kmp.h" // TRUE, asm routines prototypes
15
16typedef unsigned char uchar;
17typedef unsigned short ushort;
18
561/*
562 * Global vars
563 */
564
565#ifndef KMP_GOMP_COMPAT
566int __kmp_atomic_mode = 1; // Intel perf
567#else
568int __kmp_atomic_mode = 2; // GOMP compatibility
569#endif /* KMP_GOMP_COMPAT */
570
571KMP_ALIGN(128)
572
573// Control access to all user coded atomics in Gnu compat mode
574kmp_atomic_lock_t __kmp_atomic_lock;
575// Control access to all user coded atomics for 1-byte fixed data types
576kmp_atomic_lock_t __kmp_atomic_lock_1i;
577// Control access to all user coded atomics for 2-byte fixed data types
578kmp_atomic_lock_t __kmp_atomic_lock_2i;
579// Control access to all user coded atomics for 4-byte fixed data types
580kmp_atomic_lock_t __kmp_atomic_lock_4i;
581// Control access to all user coded atomics for kmp_real32 data type
582kmp_atomic_lock_t __kmp_atomic_lock_4r;
583// Control access to all user coded atomics for 8-byte fixed data types
584kmp_atomic_lock_t __kmp_atomic_lock_8i;
585// Control access to all user coded atomics for kmp_real64 data type
586kmp_atomic_lock_t __kmp_atomic_lock_8r;
587// Control access to all user coded atomics for complex byte data type
588kmp_atomic_lock_t __kmp_atomic_lock_8c;
589// Control access to all user coded atomics for long double data type
590kmp_atomic_lock_t __kmp_atomic_lock_10r;
591// Control access to all user coded atomics for _Quad data type
592kmp_atomic_lock_t __kmp_atomic_lock_16r;
593// Control access to all user coded atomics for double complex data type
594kmp_atomic_lock_t __kmp_atomic_lock_16c;
595// Control access to all user coded atomics for long double complex type
596kmp_atomic_lock_t __kmp_atomic_lock_20c;
597// Control access to all user coded atomics for _Quad complex data type
598kmp_atomic_lock_t __kmp_atomic_lock_32c;
599
600/* 2007-03-02:
601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602 on *_32 and *_32e. This is just a temporary workaround for the problem. It
603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604 in assembler language. */
605#define KMP_ATOMIC_VOLATILE volatile
606
607#if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608
609static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610 return lhs.q + rhs.q;
611}
612static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613 return lhs.q - rhs.q;
614}
615static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616 return lhs.q * rhs.q;
617}
618static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619 return lhs.q / rhs.q;
620}
621static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622 return lhs.q < rhs.q;
623}
624static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625 return lhs.q > rhs.q;
626}
627
628static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629 return lhs.q + rhs.q;
630}
631static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632 return lhs.q - rhs.q;
633}
634static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635 return lhs.q * rhs.q;
636}
637static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638 return lhs.q / rhs.q;
639}
640static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641 return lhs.q < rhs.q;
642}
643static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644 return lhs.q > rhs.q;
645}
646
647static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648 kmp_cmplx128_a4_t &rhs) {
649 return lhs.q + rhs.q;
650}
651static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652 kmp_cmplx128_a4_t &rhs) {
653 return lhs.q - rhs.q;
654}
655static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656 kmp_cmplx128_a4_t &rhs) {
657 return lhs.q * rhs.q;
658}
659static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660 kmp_cmplx128_a4_t &rhs) {
661 return lhs.q / rhs.q;
662}
663
664static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665 kmp_cmplx128_a16_t &rhs) {
666 return lhs.q + rhs.q;
667}
668static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669 kmp_cmplx128_a16_t &rhs) {
670 return lhs.q - rhs.q;
671}
672static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673 kmp_cmplx128_a16_t &rhs) {
674 return lhs.q * rhs.q;
675}
676static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677 kmp_cmplx128_a16_t &rhs) {
678 return lhs.q / rhs.q;
679}
680
681#endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682
683// ATOMIC implementation routines -----------------------------------------
684// One routine for each operation and operand type.
685// All routines declarations looks like
686// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687
688#define KMP_CHECK_GTID \
689 if (gtid == KMP_GTID_UNKNOWN) { \
690 gtid = __kmp_entry_gtid(); \
691 } // check and get gtid when needed
692
693// Beginning of a definition (provides name, parameters, gebug trace)
694// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695// fixed)
696// OP_ID - operation identifier (add, sub, mul, ...)
697// TYPE - operands' type
698#define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
699 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
700 TYPE *lhs, TYPE rhs) { \
701 KMP_DEBUG_ASSERT(__kmp_init_serial); \
702 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703
704// ------------------------------------------------------------------------
705// Lock variables used for critical sections for various size operands
706#define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707#define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708#define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709#define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710#define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711#define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712#define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713#define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714#define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715#define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716#define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717#define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718#define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719
720// ------------------------------------------------------------------------
721// Operation on *lhs, rhs bound by critical section
722// OP - operator (it's supposed to contain an assignment)
723// LCK_ID - lock identifier
724// Note: don't check gtid as it should always be valid
725// 1, 2-byte - expect valid parameter, other - check before this macro
726#define OP_CRITICAL(OP, LCK_ID) \
727 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
728 \
729 (*lhs) OP(rhs); \
730 \
731 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732
733#define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
734 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
735 (*lhs) = (TYPE)((*lhs)OP((TYPE)rhs)); \
736 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737
738// ------------------------------------------------------------------------
739// For GNU compatibility, we may need to use a critical section,
740// even though it is not required by the ISA.
741//
742// On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744// critical section. On Intel(R) 64, all atomic operations are done with fetch
745// and add or compare and exchange. Therefore, the FLAG parameter to this
746// macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747// require a critical section, where we predict that they will be implemented
748// in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749//
750// When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751// the FLAG parameter should always be 1. If we know that we will be using
752// a critical section, then we want to make certain that we use the generic
753// lock __kmp_atomic_lock to protect the atomic update, and not of of the
754// locks that are specialized based upon the size or type of the data.
755//
756// If FLAG is 0, then we are relying on dead code elimination by the build
757// compiler to get rid of the useless block of code, and save a needless
758// branch at runtime.
759
760#ifdef KMP_GOMP_COMPAT
761#define OP_GOMP_CRITICAL(OP, FLAG) \
762 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
763 KMP_CHECK_GTID; \
764 OP_CRITICAL(OP, 0); \
765 return; \
766 }
767
768#define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \
769 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
770 KMP_CHECK_GTID; \
771 OP_UPDATE_CRITICAL(TYPE, OP, 0); \
772 return; \
773 }
774#else
775#define OP_GOMP_CRITICAL(OP, FLAG)
776#define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777#endif /* KMP_GOMP_COMPAT */
778
779#if KMP_MIC
780#define KMP_DO_PAUSE _mm_delay_32(1)
781#else
782#define KMP_DO_PAUSE
783#endif /* KMP_MIC */
784
785// ------------------------------------------------------------------------
786// Operation on *lhs, rhs using "compare_and_store" routine
787// TYPE - operands' type
788// BITS - size in bits, used to distinguish low level calls
789// OP - operator
790#define OP_CMPXCHG(TYPE, BITS, OP) \
791 { \
792 TYPE old_value, new_value; \
793 old_value = *(TYPE volatile *)lhs; \
794 new_value = (TYPE)(old_value OP((TYPE)rhs)); \
795 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
796 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
797 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
798 KMP_DO_PAUSE; \
799 \
800 old_value = *(TYPE volatile *)lhs; \
801 new_value = (TYPE)(old_value OP((TYPE)rhs)); \
802 } \
803 }
804
805#if USE_CMPXCHG_FIX
806// 2007-06-25:
807// workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808// and win_32e are affected (I verified the asm). Compiler ignores the volatile
809// qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810// compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811// the workaround.
812#define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
813 { \
814 struct _sss { \
815 TYPE cmp; \
816 kmp_int##BITS *vvv; \
817 }; \
818 struct _sss old_value, new_value; \
819 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
820 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
821 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
822 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
823 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
824 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
825 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
826 KMP_DO_PAUSE; \
827 \
828 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
829 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
830 } \
831 }
832// end of the first part of the workaround for C78287
833#endif // USE_CMPXCHG_FIX
834
835#if KMP_OS_WINDOWS && KMP_ARCH_AARCH64
836// Undo explicit type casts to get MSVC ARM64 to build. Uses
837// OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
838#undef OP_CMPXCHG
839#define OP_CMPXCHG(TYPE, BITS, OP) \
840 { \
841 struct _sss { \
842 TYPE cmp; \
843 kmp_int##BITS *vvv; \
844 }; \
845 struct _sss old_value, new_value; \
846 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
847 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
848 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
849 new_value.cmp = old_value.cmp OP rhs; \
850 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
851 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
852 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
853 KMP_DO_PAUSE; \
854 \
855 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
856 new_value.cmp = old_value.cmp OP rhs; \
857 } \
858 }
859
860#undef OP_UPDATE_CRITICAL
861#define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
862 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
863 (*lhs) = (*lhs)OP rhs; \
864 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
865
866#endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64
867
868#if KMP_ARCH_X86 || KMP_ARCH_X86_64
869
870// ------------------------------------------------------------------------
871// X86 or X86_64: no alignment problems ====================================
872#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
873 GOMP_FLAG) \
874 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
875 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
876 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
877 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
878 }
879// -------------------------------------------------------------------------
880#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
881 GOMP_FLAG) \
882 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
883 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
884 OP_CMPXCHG(TYPE, BITS, OP) \
885 }
886#if USE_CMPXCHG_FIX
887// -------------------------------------------------------------------------
888// workaround for C78287 (complex(kind=4) data type)
889#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
890 MASK, GOMP_FLAG) \
891 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
892 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
893 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
894 }
895// end of the second part of the workaround for C78287
896#endif // USE_CMPXCHG_FIX
897
898#else
899// -------------------------------------------------------------------------
900// Code for other architectures that don't handle unaligned accesses.
901#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
902 GOMP_FLAG) \
903 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
904 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
905 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
906 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
907 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
908 } else { \
909 KMP_CHECK_GTID; \
910 OP_UPDATE_CRITICAL(TYPE, OP, \
911 LCK_ID) /* unaligned address - use critical */ \
912 } \
913 }
914// -------------------------------------------------------------------------
915#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
916 GOMP_FLAG) \
917 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
918 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
919 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
920 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
921 } else { \
922 KMP_CHECK_GTID; \
923 OP_UPDATE_CRITICAL(TYPE, OP, \
924 LCK_ID) /* unaligned address - use critical */ \
925 } \
926 }
927#if USE_CMPXCHG_FIX
928// -------------------------------------------------------------------------
929// workaround for C78287 (complex(kind=4) data type)
930#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
931 MASK, GOMP_FLAG) \
932 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
933 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
934 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
935 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
936 } else { \
937 KMP_CHECK_GTID; \
938 OP_UPDATE_CRITICAL(TYPE, OP, \
939 LCK_ID) /* unaligned address - use critical */ \
940 } \
941 }
942// end of the second part of the workaround for C78287
943#endif // USE_CMPXCHG_FIX
944#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
945
946// Routines for ATOMIC 4-byte operands addition and subtraction
947ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
948 0) // __kmpc_atomic_fixed4_add
949ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
950 0) // __kmpc_atomic_fixed4_sub
951
952ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
953 KMP_ARCH_X86) // __kmpc_atomic_float4_add
954ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
955 KMP_ARCH_X86) // __kmpc_atomic_float4_sub
956
957// Routines for ATOMIC 8-byte operands addition and subtraction
958ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
959 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
960ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
961 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
962
963ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
964 KMP_ARCH_X86) // __kmpc_atomic_float8_add
965ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
966 KMP_ARCH_X86) // __kmpc_atomic_float8_sub
967
968// ------------------------------------------------------------------------
969// Entries definition for integer operands
970// TYPE_ID - operands type and size (fixed4, float4)
971// OP_ID - operation identifier (add, sub, mul, ...)
972// TYPE - operand type
973// BITS - size in bits, used to distinguish low level calls
974// OP - operator (used in critical section)
975// LCK_ID - lock identifier, used to possibly distinguish lock variable
976// MASK - used for alignment check
977
978// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
979// ------------------------------------------------------------------------
980// Routines for ATOMIC integer operands, other operators
981// ------------------------------------------------------------------------
982// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
983ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
984 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
985ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
986 0) // __kmpc_atomic_fixed1_andb
987ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
988 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
989ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
990 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
991ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
992 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
993ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
994 0) // __kmpc_atomic_fixed1_orb
995ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
996 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
997ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
998 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
999ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
1000 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
1001ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
1002 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
1003ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
1004 0) // __kmpc_atomic_fixed1_xor
1005ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
1006 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
1007ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
1008 0) // __kmpc_atomic_fixed2_andb
1009ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
1010 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
1011ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
1012 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
1013ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
1014 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
1015ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
1016 0) // __kmpc_atomic_fixed2_orb
1017ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
1018 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
1019ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
1020 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
1021ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
1022 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
1023ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
1024 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
1025ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
1026 0) // __kmpc_atomic_fixed2_xor
1027ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
1028 0) // __kmpc_atomic_fixed4_andb
1029ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
1030 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
1031ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
1032 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1033ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1034 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1035ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1036 0) // __kmpc_atomic_fixed4_orb
1037ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1038 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1039ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1040 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1041ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1042 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1043ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1044 0) // __kmpc_atomic_fixed4_xor
1045ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1046 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1047ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1048 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1049ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1050 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1051ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1052 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1053ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1054 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1055ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1056 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1057ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1058 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1059ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1060 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1061ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1062 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1063ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1064 KMP_ARCH_X86) // __kmpc_atomic_float4_div
1065ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1066 KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1067ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1068 KMP_ARCH_X86) // __kmpc_atomic_float8_div
1069ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1070 KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1071// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1072
1073/* ------------------------------------------------------------------------ */
1074/* Routines for C/C++ Reduction operators && and || */
1075
1076// ------------------------------------------------------------------------
1077// Need separate macros for &&, || because there is no combined assignment
1078// TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1079#define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1080 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1081 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1082 OP_CRITICAL(= *lhs OP, LCK_ID) \
1083 }
1084
1085#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1086
1087// ------------------------------------------------------------------------
1088// X86 or X86_64: no alignment problems ===================================
1089#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1090 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1091 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1092 OP_CMPXCHG(TYPE, BITS, OP) \
1093 }
1094
1095#else
1096// ------------------------------------------------------------------------
1097// Code for other architectures that don't handle unaligned accesses.
1098#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1099 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1100 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1101 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1102 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1103 } else { \
1104 KMP_CHECK_GTID; \
1105 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1106 } \
1107 }
1108#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1109
1110ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1111 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1112ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1113 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1114ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1115 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1116ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1117 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1118ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1119 0) // __kmpc_atomic_fixed4_andl
1120ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1121 0) // __kmpc_atomic_fixed4_orl
1122ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1123 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1124ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1125 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1126
1127/* ------------------------------------------------------------------------- */
1128/* Routines for Fortran operators that matched no one in C: */
1129/* MAX, MIN, .EQV., .NEQV. */
1130/* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1131/* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1132
1133// -------------------------------------------------------------------------
1134// MIN and MAX need separate macros
1135// OP - operator to check if we need any actions?
1136#define MIN_MAX_CRITSECT(OP, LCK_ID) \
1137 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1138 \
1139 if (*lhs OP rhs) { /* still need actions? */ \
1140 *lhs = rhs; \
1141 } \
1142 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1143
1144// -------------------------------------------------------------------------
1145#ifdef KMP_GOMP_COMPAT
1146#define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1147 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1148 KMP_CHECK_GTID; \
1149 MIN_MAX_CRITSECT(OP, 0); \
1150 return; \
1151 }
1152#else
1153#define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1154#endif /* KMP_GOMP_COMPAT */
1155
1156// -------------------------------------------------------------------------
1157#define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1158 { \
1159 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1160 TYPE old_value; \
1161 temp_val = *lhs; \
1162 old_value = temp_val; \
1163 while (old_value OP rhs && /* still need actions? */ \
1164 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1165 (kmp_int##BITS *)lhs, \
1166 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1167 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1168 temp_val = *lhs; \
1169 old_value = temp_val; \
1170 } \
1171 }
1172
1173// -------------------------------------------------------------------------
1174// 1-byte, 2-byte operands - use critical section
1175#define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1176 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1177 if (*lhs OP rhs) { /* need actions? */ \
1178 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1179 MIN_MAX_CRITSECT(OP, LCK_ID) \
1180 } \
1181 }
1182
1183#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184
1185// -------------------------------------------------------------------------
1186// X86 or X86_64: no alignment problems ====================================
1187#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1188 GOMP_FLAG) \
1189 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1190 if (*lhs OP rhs) { \
1191 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1192 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1193 } \
1194 }
1195
1196#else
1197// -------------------------------------------------------------------------
1198// Code for other architectures that don't handle unaligned accesses.
1199#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1200 GOMP_FLAG) \
1201 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1202 if (*lhs OP rhs) { \
1203 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1204 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1205 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1206 } else { \
1207 KMP_CHECK_GTID; \
1208 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1209 } \
1210 } \
1211 }
1212#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1213
1214MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1215 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1216MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1217 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1218MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1219 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1220MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1221 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1222MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1223 0) // __kmpc_atomic_fixed4_max
1224MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1225 0) // __kmpc_atomic_fixed4_min
1226MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1227 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1228MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1229 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1230MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1231 KMP_ARCH_X86) // __kmpc_atomic_float4_max
1232MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1233 KMP_ARCH_X86) // __kmpc_atomic_float4_min
1234MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1235 KMP_ARCH_X86) // __kmpc_atomic_float8_max
1236MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1237 KMP_ARCH_X86) // __kmpc_atomic_float8_min
1238#if KMP_HAVE_QUAD
1239MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1240 1) // __kmpc_atomic_float16_max
1241MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1242 1) // __kmpc_atomic_float16_min
1243#if (KMP_ARCH_X86)
1244MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1245 1) // __kmpc_atomic_float16_max_a16
1246MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1247 1) // __kmpc_atomic_float16_min_a16
1248#endif // (KMP_ARCH_X86)
1249#endif // KMP_HAVE_QUAD
1250// ------------------------------------------------------------------------
1251// Need separate macros for .EQV. because of the need of complement (~)
1252// OP ignored for critical sections, ^=~ used instead
1253#define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1254 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1255 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1256 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \
1257 }
1258
1259// ------------------------------------------------------------------------
1260#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1261// ------------------------------------------------------------------------
1262// X86 or X86_64: no alignment problems ===================================
1263#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1264 GOMP_FLAG) \
1265 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1266 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1267 OP_CMPXCHG(TYPE, BITS, OP) \
1268 }
1269// ------------------------------------------------------------------------
1270#else
1271// ------------------------------------------------------------------------
1272// Code for other architectures that don't handle unaligned accesses.
1273#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1274 GOMP_FLAG) \
1275 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1276 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \
1277 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1278 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1279 } else { \
1280 KMP_CHECK_GTID; \
1281 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \
1282 } \
1283 }
1284#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1285
1286ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1287 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1288ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1289 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1290ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1291 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1292ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1293 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1294ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1295 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1296ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1297 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1298ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1299 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1300ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1301 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1302
1303// ------------------------------------------------------------------------
1304// Routines for Extended types: long double, _Quad, complex flavours (use
1305// critical section)
1306// TYPE_ID, OP_ID, TYPE - detailed above
1307// OP - operator
1308// LCK_ID - lock identifier, used to possibly distinguish lock variable
1309#define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1310 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1311 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1312 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1313 }
1314
1315/* ------------------------------------------------------------------------- */
1316// routines for long double type
1317ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1318 1) // __kmpc_atomic_float10_add
1319ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1320 1) // __kmpc_atomic_float10_sub
1321ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1322 1) // __kmpc_atomic_float10_mul
1323ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1324 1) // __kmpc_atomic_float10_div
1325#if KMP_HAVE_QUAD
1326// routines for _Quad type
1327ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1328 1) // __kmpc_atomic_float16_add
1329ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1330 1) // __kmpc_atomic_float16_sub
1331ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1332 1) // __kmpc_atomic_float16_mul
1333ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1334 1) // __kmpc_atomic_float16_div
1335#if (KMP_ARCH_X86)
1336ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1337 1) // __kmpc_atomic_float16_add_a16
1338ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1339 1) // __kmpc_atomic_float16_sub_a16
1340ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1341 1) // __kmpc_atomic_float16_mul_a16
1342ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1343 1) // __kmpc_atomic_float16_div_a16
1344#endif // (KMP_ARCH_X86)
1345#endif // KMP_HAVE_QUAD
1346// routines for complex types
1347
1348#if USE_CMPXCHG_FIX
1349// workaround for C78287 (complex(kind=4) data type)
1350ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1351 1) // __kmpc_atomic_cmplx4_add
1352ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1353 1) // __kmpc_atomic_cmplx4_sub
1354ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1355 1) // __kmpc_atomic_cmplx4_mul
1356ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1357 1) // __kmpc_atomic_cmplx4_div
1358// end of the workaround for C78287
1359#else
1360ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1361ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1362ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1363ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1364#endif // USE_CMPXCHG_FIX
1365
1366ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1367ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1368ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1369ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1370ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1371 1) // __kmpc_atomic_cmplx10_add
1372ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1373 1) // __kmpc_atomic_cmplx10_sub
1374ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1375 1) // __kmpc_atomic_cmplx10_mul
1376ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1377 1) // __kmpc_atomic_cmplx10_div
1378#if KMP_HAVE_QUAD
1379ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1380 1) // __kmpc_atomic_cmplx16_add
1381ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1382 1) // __kmpc_atomic_cmplx16_sub
1383ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1384 1) // __kmpc_atomic_cmplx16_mul
1385ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1386 1) // __kmpc_atomic_cmplx16_div
1387#if (KMP_ARCH_X86)
1388ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1389 1) // __kmpc_atomic_cmplx16_add_a16
1390ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1391 1) // __kmpc_atomic_cmplx16_sub_a16
1392ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1393 1) // __kmpc_atomic_cmplx16_mul_a16
1394ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1395 1) // __kmpc_atomic_cmplx16_div_a16
1396#endif // (KMP_ARCH_X86)
1397#endif // KMP_HAVE_QUAD
1398
1399// OpenMP 4.0: x = expr binop x for non-commutative operations.
1400// Supported only on IA-32 architecture and Intel(R) 64
1401#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1402
1403// ------------------------------------------------------------------------
1404// Operation on *lhs, rhs bound by critical section
1405// OP - operator (it's supposed to contain an assignment)
1406// LCK_ID - lock identifier
1407// Note: don't check gtid as it should always be valid
1408// 1, 2-byte - expect valid parameter, other - check before this macro
1409#define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1410 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1411 \
1412 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
1413 \
1414 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1415
1416#ifdef KMP_GOMP_COMPAT
1417#define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \
1418 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1419 KMP_CHECK_GTID; \
1420 OP_CRITICAL_REV(TYPE, OP, 0); \
1421 return; \
1422 }
1423
1424#else
1425#define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1426#endif /* KMP_GOMP_COMPAT */
1427
1428// Beginning of a definition (provides name, parameters, gebug trace)
1429// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1430// fixed)
1431// OP_ID - operation identifier (add, sub, mul, ...)
1432// TYPE - operands' type
1433#define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1434 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1435 TYPE *lhs, TYPE rhs) { \
1436 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1437 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1438
1439// ------------------------------------------------------------------------
1440// Operation on *lhs, rhs using "compare_and_store" routine
1441// TYPE - operands' type
1442// BITS - size in bits, used to distinguish low level calls
1443// OP - operator
1444// Note: temp_val introduced in order to force the compiler to read
1445// *lhs only once (w/o it the compiler reads *lhs twice)
1446#define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1447 { \
1448 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1449 TYPE old_value, new_value; \
1450 temp_val = *lhs; \
1451 old_value = temp_val; \
1452 new_value = (TYPE)(rhs OP old_value); \
1453 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1454 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1455 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1456 KMP_DO_PAUSE; \
1457 \
1458 temp_val = *lhs; \
1459 old_value = temp_val; \
1460 new_value = (TYPE)(rhs OP old_value); \
1461 } \
1462 }
1463
1464// -------------------------------------------------------------------------
1465#define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1466 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1467 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1468 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1469 }
1470
1471// ------------------------------------------------------------------------
1472// Entries definition for integer operands
1473// TYPE_ID - operands type and size (fixed4, float4)
1474// OP_ID - operation identifier (add, sub, mul, ...)
1475// TYPE - operand type
1476// BITS - size in bits, used to distinguish low level calls
1477// OP - operator (used in critical section)
1478// LCK_ID - lock identifier, used to possibly distinguish lock variable
1479
1480// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1481// ------------------------------------------------------------------------
1482// Routines for ATOMIC integer operands, other operators
1483// ------------------------------------------------------------------------
1484// TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1485ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1486 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1487ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1488 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1489ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1490 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1491ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1492 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1493ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1494 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1495ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1496 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1497
1498ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1499 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1500ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1501 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1502ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1503 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1504ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1505 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1506ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1507 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1508ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1509 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1510
1511ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1512 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1513ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1514 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1515ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1516 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1517ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1518 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1519ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1520 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1521ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1522 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1523
1524ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1525 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1526ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1527 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1528ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1529 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1530ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1531 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1532ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1533 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1534ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1535 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1536
1537ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1538 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1539ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1540 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1541
1542ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1543 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1544ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1545 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1546// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1547
1548// ------------------------------------------------------------------------
1549// Routines for Extended types: long double, _Quad, complex flavours (use
1550// critical section)
1551// TYPE_ID, OP_ID, TYPE - detailed above
1552// OP - operator
1553// LCK_ID - lock identifier, used to possibly distinguish lock variable
1554#define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1555 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1556 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1557 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1558 }
1559
1560/* ------------------------------------------------------------------------- */
1561// routines for long double type
1562ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1563 1) // __kmpc_atomic_float10_sub_rev
1564ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1565 1) // __kmpc_atomic_float10_div_rev
1566#if KMP_HAVE_QUAD
1567// routines for _Quad type
1568ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1569 1) // __kmpc_atomic_float16_sub_rev
1570ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1571 1) // __kmpc_atomic_float16_div_rev
1572#if (KMP_ARCH_X86)
1573ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1574 1) // __kmpc_atomic_float16_sub_a16_rev
1575ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1576 1) // __kmpc_atomic_float16_div_a16_rev
1577#endif // KMP_ARCH_X86
1578#endif // KMP_HAVE_QUAD
1579
1580// routines for complex types
1581ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1582 1) // __kmpc_atomic_cmplx4_sub_rev
1583ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1584 1) // __kmpc_atomic_cmplx4_div_rev
1585ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1586 1) // __kmpc_atomic_cmplx8_sub_rev
1587ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1588 1) // __kmpc_atomic_cmplx8_div_rev
1589ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1590 1) // __kmpc_atomic_cmplx10_sub_rev
1591ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1592 1) // __kmpc_atomic_cmplx10_div_rev
1593#if KMP_HAVE_QUAD
1594ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1595 1) // __kmpc_atomic_cmplx16_sub_rev
1596ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1597 1) // __kmpc_atomic_cmplx16_div_rev
1598#if (KMP_ARCH_X86)
1599ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1600 1) // __kmpc_atomic_cmplx16_sub_a16_rev
1601ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1602 1) // __kmpc_atomic_cmplx16_div_a16_rev
1603#endif // KMP_ARCH_X86
1604#endif // KMP_HAVE_QUAD
1605
1606#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1607// End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1608
1609/* ------------------------------------------------------------------------ */
1610/* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1611/* Note: in order to reduce the total number of types combinations */
1612/* it is supposed that compiler converts RHS to longest floating type,*/
1613/* that is _Quad, before call to any of these routines */
1614/* Conversion to _Quad will be done by the compiler during calculation, */
1615/* conversion back to TYPE - before the assignment, like: */
1616/* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1617/* Performance penalty expected because of SW emulation use */
1618/* ------------------------------------------------------------------------ */
1619
1620#define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1621 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1622 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1623 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1624 KA_TRACE(100, \
1625 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1626 gtid));
1627
1628// -------------------------------------------------------------------------
1629#define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1630 GOMP_FLAG) \
1631 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1632 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1633 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1634 }
1635
1636// -------------------------------------------------------------------------
1637#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1638// -------------------------------------------------------------------------
1639// X86 or X86_64: no alignment problems ====================================
1640#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1641 LCK_ID, MASK, GOMP_FLAG) \
1642 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1643 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1644 OP_CMPXCHG(TYPE, BITS, OP) \
1645 }
1646// -------------------------------------------------------------------------
1647#else
1648// ------------------------------------------------------------------------
1649// Code for other architectures that don't handle unaligned accesses.
1650#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1651 LCK_ID, MASK, GOMP_FLAG) \
1652 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1653 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1654 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1655 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1656 } else { \
1657 KMP_CHECK_GTID; \
1658 OP_UPDATE_CRITICAL(TYPE, OP, \
1659 LCK_ID) /* unaligned address - use critical */ \
1660 } \
1661 }
1662#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1663
1664// -------------------------------------------------------------------------
1665#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1666// -------------------------------------------------------------------------
1667#define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1668 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1669 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1670 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1671 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1672 }
1673#define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1674 LCK_ID, GOMP_FLAG) \
1675 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1676 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1677 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1678 }
1679#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1680
1681// RHS=float8
1682ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1683 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1684ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1685 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1686ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1687 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1688ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1689 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1690ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1691 0) // __kmpc_atomic_fixed4_mul_float8
1692ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1693 0) // __kmpc_atomic_fixed4_div_float8
1694ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1695 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1696ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1697 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1698ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1699 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1700ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1701 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1702ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1703 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1704ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1705 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1706
1707// RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1708// use them)
1709#if KMP_HAVE_QUAD
1710ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1711 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1712ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1713 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1714ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1715 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1716ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1717 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1718ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1719 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1720ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1721 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1722ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1723 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1724ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1725 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1726
1727ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1728 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1729ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1730 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1731ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1732 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1733ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1734 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1735ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1736 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1737ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1738 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1739ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1740 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1741ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1742 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1743
1744ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1745 0) // __kmpc_atomic_fixed4_add_fp
1746ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1747 0) // __kmpc_atomic_fixed4u_add_fp
1748ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1749 0) // __kmpc_atomic_fixed4_sub_fp
1750ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1751 0) // __kmpc_atomic_fixed4u_sub_fp
1752ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1753 0) // __kmpc_atomic_fixed4_mul_fp
1754ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1755 0) // __kmpc_atomic_fixed4u_mul_fp
1756ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1757 0) // __kmpc_atomic_fixed4_div_fp
1758ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1759 0) // __kmpc_atomic_fixed4u_div_fp
1760
1761ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1762 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1763ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1764 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1765ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1766 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1767ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1768 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1769ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1770 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1771ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1772 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1773ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1774 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1775ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1776 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1777
1778ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1779 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1780ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1781 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1782ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1783 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1784ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1785 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1786
1787ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1788 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1789ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1790 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1791ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1792 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1793ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1794 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1795
1796ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1797 1) // __kmpc_atomic_float10_add_fp
1798ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1799 1) // __kmpc_atomic_float10_sub_fp
1800ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1801 1) // __kmpc_atomic_float10_mul_fp
1802ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1803 1) // __kmpc_atomic_float10_div_fp
1804
1805#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1806// Reverse operations
1807ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1808 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1809ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1810 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1811ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1812 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1813ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1814 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1815
1816ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1817 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1818ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1819 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1820ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1821 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1822ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1823 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1824
1825ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1826 0) // __kmpc_atomic_fixed4_sub_rev_fp
1827ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1828 0) // __kmpc_atomic_fixed4u_sub_rev_fp
1829ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1830 0) // __kmpc_atomic_fixed4_div_rev_fp
1831ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1832 0) // __kmpc_atomic_fixed4u_div_rev_fp
1833
1834ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1835 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1836ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1837 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1838ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1839 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1840ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1841 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1842
1843ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1844 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1845ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1846 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1847
1848ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1849 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1850ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1851 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1852
1853ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1854 1) // __kmpc_atomic_float10_sub_rev_fp
1855ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1856 1) // __kmpc_atomic_float10_div_rev_fp
1857#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1858
1859#endif // KMP_HAVE_QUAD
1860
1861#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1862// ------------------------------------------------------------------------
1863// X86 or X86_64: no alignment problems ====================================
1864#if USE_CMPXCHG_FIX
1865// workaround for C78287 (complex(kind=4) data type)
1866#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1867 LCK_ID, MASK, GOMP_FLAG) \
1868 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1869 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1870 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1871 }
1872// end of the second part of the workaround for C78287
1873#else
1874#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1875 LCK_ID, MASK, GOMP_FLAG) \
1876 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1877 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1878 OP_CMPXCHG(TYPE, BITS, OP) \
1879 }
1880#endif // USE_CMPXCHG_FIX
1881#else
1882// ------------------------------------------------------------------------
1883// Code for other architectures that don't handle unaligned accesses.
1884#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1885 LCK_ID, MASK, GOMP_FLAG) \
1886 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1887 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1888 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1889 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1890 } else { \
1891 KMP_CHECK_GTID; \
1892 OP_UPDATE_CRITICAL(TYPE, OP, \
1893 LCK_ID) /* unaligned address - use critical */ \
1894 } \
1895 }
1896#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1897
1898ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1899 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1900ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1901 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1902ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1903 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1904ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1905 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1906
1907// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1908#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1909
1910// ------------------------------------------------------------------------
1911// Atomic READ routines
1912
1913// ------------------------------------------------------------------------
1914// Beginning of a definition (provides name, parameters, gebug trace)
1915// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1916// fixed)
1917// OP_ID - operation identifier (add, sub, mul, ...)
1918// TYPE - operands' type
1919#define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1920 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1921 TYPE *loc) { \
1922 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1923 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1924
1925// ------------------------------------------------------------------------
1926// Operation on *lhs, rhs using "compare_and_store_ret" routine
1927// TYPE - operands' type
1928// BITS - size in bits, used to distinguish low level calls
1929// OP - operator
1930// Note: temp_val introduced in order to force the compiler to read
1931// *lhs only once (w/o it the compiler reads *lhs twice)
1932// TODO: check if it is still necessary
1933// Return old value regardless of the result of "compare & swap# operation
1934#define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1935 { \
1936 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1937 union f_i_union { \
1938 TYPE f_val; \
1939 kmp_int##BITS i_val; \
1940 }; \
1941 union f_i_union old_value; \
1942 temp_val = *loc; \
1943 old_value.f_val = temp_val; \
1944 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1945 (kmp_int##BITS *)loc, \
1946 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1947 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1948 new_value = old_value.f_val; \
1949 return new_value; \
1950 }
1951
1952// -------------------------------------------------------------------------
1953// Operation on *lhs, rhs bound by critical section
1954// OP - operator (it's supposed to contain an assignment)
1955// LCK_ID - lock identifier
1956// Note: don't check gtid as it should always be valid
1957// 1, 2-byte - expect valid parameter, other - check before this macro
1958#define OP_CRITICAL_READ(OP, LCK_ID) \
1959 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1960 \
1961 new_value = (*loc); \
1962 \
1963 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1964
1965// -------------------------------------------------------------------------
1966#ifdef KMP_GOMP_COMPAT
1967#define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1968 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1969 KMP_CHECK_GTID; \
1970 OP_CRITICAL_READ(OP, 0); \
1971 return new_value; \
1972 }
1973#else
1974#define OP_GOMP_CRITICAL_READ(OP, FLAG)
1975#endif /* KMP_GOMP_COMPAT */
1976
1977// -------------------------------------------------------------------------
1978#define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1979 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1980 TYPE new_value; \
1981 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1982 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1983 return new_value; \
1984 }
1985// -------------------------------------------------------------------------
1986#define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1987 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1988 TYPE new_value; \
1989 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1990 OP_CMPXCHG_READ(TYPE, BITS, OP) \
1991 }
1992// ------------------------------------------------------------------------
1993// Routines for Extended types: long double, _Quad, complex flavours (use
1994// critical section)
1995// TYPE_ID, OP_ID, TYPE - detailed above
1996// OP - operator
1997// LCK_ID - lock identifier, used to possibly distinguish lock variable
1998#define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1999 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
2000 TYPE new_value; \
2001 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
2002 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
2003 return new_value; \
2004 }
2005
2006// ------------------------------------------------------------------------
2007// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
2008// value doesn't work.
2009// Let's return the read value through the additional parameter.
2010#if (KMP_OS_WINDOWS)
2011
2012#define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
2013 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2014 \
2015 (*out) = (*loc); \
2016 \
2017 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2018// ------------------------------------------------------------------------
2019#ifdef KMP_GOMP_COMPAT
2020#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
2021 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2022 KMP_CHECK_GTID; \
2023 OP_CRITICAL_READ_WRK(OP, 0); \
2024 }
2025#else
2026#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
2027#endif /* KMP_GOMP_COMPAT */
2028// ------------------------------------------------------------------------
2029#define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2030 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
2031 TYPE *loc) { \
2032 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2033 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2034
2035// ------------------------------------------------------------------------
2036#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2037 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2038 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
2039 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
2040 }
2041
2042#endif // KMP_OS_WINDOWS
2043
2044// ------------------------------------------------------------------------
2045// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2046ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2047ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2048 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2049ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2050 KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2051ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2052 KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2053
2054// !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2055ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2056 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2057ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2058 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2059
2060ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2061 1) // __kmpc_atomic_float10_rd
2062#if KMP_HAVE_QUAD
2063ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2064 1) // __kmpc_atomic_float16_rd
2065#endif // KMP_HAVE_QUAD
2066
2067// Fix for CQ220361 on Windows* OS
2068#if (KMP_OS_WINDOWS)
2069ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2070 1) // __kmpc_atomic_cmplx4_rd
2071#else
2072ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2073 1) // __kmpc_atomic_cmplx4_rd
2074#endif // (KMP_OS_WINDOWS)
2075ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2076 1) // __kmpc_atomic_cmplx8_rd
2077ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2078 1) // __kmpc_atomic_cmplx10_rd
2079#if KMP_HAVE_QUAD
2080ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2081 1) // __kmpc_atomic_cmplx16_rd
2082#if (KMP_ARCH_X86)
2083ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2084 1) // __kmpc_atomic_float16_a16_rd
2085ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2086 1) // __kmpc_atomic_cmplx16_a16_rd
2087#endif // (KMP_ARCH_X86)
2088#endif // KMP_HAVE_QUAD
2089
2090// ------------------------------------------------------------------------
2091// Atomic WRITE routines
2092
2093#define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2094 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2095 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2096 KMP_XCHG_FIXED##BITS(lhs, rhs); \
2097 }
2098// ------------------------------------------------------------------------
2099#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2100 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2101 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2102 KMP_XCHG_REAL##BITS(lhs, rhs); \
2103 }
2104
2105// ------------------------------------------------------------------------
2106// Operation on *lhs, rhs using "compare_and_store" routine
2107// TYPE - operands' type
2108// BITS - size in bits, used to distinguish low level calls
2109// OP - operator
2110// Note: temp_val introduced in order to force the compiler to read
2111// *lhs only once (w/o it the compiler reads *lhs twice)
2112#define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2113 { \
2114 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2115 TYPE old_value, new_value; \
2116 temp_val = *lhs; \
2117 old_value = temp_val; \
2118 new_value = rhs; \
2119 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2120 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2121 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2122 temp_val = *lhs; \
2123 old_value = temp_val; \
2124 new_value = rhs; \
2125 } \
2126 }
2127
2128// -------------------------------------------------------------------------
2129#define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2130 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2131 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2132 OP_CMPXCHG_WR(TYPE, BITS, OP) \
2133 }
2134
2135// ------------------------------------------------------------------------
2136// Routines for Extended types: long double, _Quad, complex flavours (use
2137// critical section)
2138// TYPE_ID, OP_ID, TYPE - detailed above
2139// OP - operator
2140// LCK_ID - lock identifier, used to possibly distinguish lock variable
2141#define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2142 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2143 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2144 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2145 }
2146// -------------------------------------------------------------------------
2147
2148ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2149 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2150ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2151 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2152ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2153 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2154#if (KMP_ARCH_X86)
2155ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2156 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2157#else
2158ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2159 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2160#endif // (KMP_ARCH_X86)
2161
2162ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2163 KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2164#if (KMP_ARCH_X86)
2165ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2166 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2167#else
2168ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2169 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2170#endif // (KMP_ARCH_X86)
2171
2172ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2173 1) // __kmpc_atomic_float10_wr
2174#if KMP_HAVE_QUAD
2175ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2176 1) // __kmpc_atomic_float16_wr
2177#endif // KMP_HAVE_QUAD
2178ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2179ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2180 1) // __kmpc_atomic_cmplx8_wr
2181ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2182 1) // __kmpc_atomic_cmplx10_wr
2183#if KMP_HAVE_QUAD
2184ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2185 1) // __kmpc_atomic_cmplx16_wr
2186#if (KMP_ARCH_X86)
2187ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2188 1) // __kmpc_atomic_float16_a16_wr
2189ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2190 1) // __kmpc_atomic_cmplx16_a16_wr
2191#endif // (KMP_ARCH_X86)
2192#endif // KMP_HAVE_QUAD
2193
2194// ------------------------------------------------------------------------
2195// Atomic CAPTURE routines
2196
2197// Beginning of a definition (provides name, parameters, gebug trace)
2198// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2199// fixed)
2200// OP_ID - operation identifier (add, sub, mul, ...)
2201// TYPE - operands' type
2202#define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2203 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2204 TYPE *lhs, TYPE rhs, int flag) { \
2205 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2206 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2207
2208// -------------------------------------------------------------------------
2209// Operation on *lhs, rhs bound by critical section
2210// OP - operator (it's supposed to contain an assignment)
2211// LCK_ID - lock identifier
2212// Note: don't check gtid as it should always be valid
2213// 1, 2-byte - expect valid parameter, other - check before this macro
2214#define OP_CRITICAL_CPT(OP, LCK_ID) \
2215 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2216 \
2217 if (flag) { \
2218 (*lhs) OP rhs; \
2219 new_value = (*lhs); \
2220 } else { \
2221 new_value = (*lhs); \
2222 (*lhs) OP rhs; \
2223 } \
2224 \
2225 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2226 return new_value;
2227
2228#define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \
2229 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2230 \
2231 if (flag) { \
2232 (*lhs) = (TYPE)((*lhs)OP rhs); \
2233 new_value = (*lhs); \
2234 } else { \
2235 new_value = (*lhs); \
2236 (*lhs) = (TYPE)((*lhs)OP rhs); \
2237 } \
2238 \
2239 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2240 return new_value;
2241
2242// ------------------------------------------------------------------------
2243#ifdef KMP_GOMP_COMPAT
2244#define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \
2245 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2246 KMP_CHECK_GTID; \
2247 OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \
2248 }
2249#else
2250#define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2251#endif /* KMP_GOMP_COMPAT */
2252
2253// ------------------------------------------------------------------------
2254// Operation on *lhs, rhs using "compare_and_store" routine
2255// TYPE - operands' type
2256// BITS - size in bits, used to distinguish low level calls
2257// OP - operator
2258// Note: temp_val introduced in order to force the compiler to read
2259// *lhs only once (w/o it the compiler reads *lhs twice)
2260#define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2261 { \
2262 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2263 TYPE old_value, new_value; \
2264 temp_val = *lhs; \
2265 old_value = temp_val; \
2266 new_value = (TYPE)(old_value OP rhs); \
2267 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2268 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2269 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2270 temp_val = *lhs; \
2271 old_value = temp_val; \
2272 new_value = (TYPE)(old_value OP rhs); \
2273 } \
2274 if (flag) { \
2275 return new_value; \
2276 } else \
2277 return old_value; \
2278 }
2279
2280// -------------------------------------------------------------------------
2281#define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2282 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2283 TYPE new_value; \
2284 (void)new_value; \
2285 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2286 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2287 }
2288
2289// -------------------------------------------------------------------------
2290#define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2291 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2292 TYPE old_value, new_value; \
2293 (void)new_value; \
2294 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2295 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2296 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2297 if (flag) { \
2298 return old_value OP rhs; \
2299 } else \
2300 return old_value; \
2301 }
2302// -------------------------------------------------------------------------
2303
2304ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2305 0) // __kmpc_atomic_fixed4_add_cpt
2306ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2307 0) // __kmpc_atomic_fixed4_sub_cpt
2308ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2309 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2310ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2311 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2312
2313ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2314 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2315ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2316 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2317ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2318 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2319ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2320 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2321
2322// ------------------------------------------------------------------------
2323// Entries definition for integer operands
2324// TYPE_ID - operands type and size (fixed4, float4)
2325// OP_ID - operation identifier (add, sub, mul, ...)
2326// TYPE - operand type
2327// BITS - size in bits, used to distinguish low level calls
2328// OP - operator (used in critical section)
2329// TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2330// ------------------------------------------------------------------------
2331// Routines for ATOMIC integer operands, other operators
2332// ------------------------------------------------------------------------
2333// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2334ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2335 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2336ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2337 0) // __kmpc_atomic_fixed1_andb_cpt
2338ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2339 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2340ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2341 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2342ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2343 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2344ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2345 0) // __kmpc_atomic_fixed1_orb_cpt
2346ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2347 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2348ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2349 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2350ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2351 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2352ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2353 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2354ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2355 0) // __kmpc_atomic_fixed1_xor_cpt
2356ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2357 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2358ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2359 0) // __kmpc_atomic_fixed2_andb_cpt
2360ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2361 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2362ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2363 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2364ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2365 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2366ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2367 0) // __kmpc_atomic_fixed2_orb_cpt
2368ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2369 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2370ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2371 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2372ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2373 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2374ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2375 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2376ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2377 0) // __kmpc_atomic_fixed2_xor_cpt
2378ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2379 0) // __kmpc_atomic_fixed4_andb_cpt
2380ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2381 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2382ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2383 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2384ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2385 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2386ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2387 0) // __kmpc_atomic_fixed4_orb_cpt
2388ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2389 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2390ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2391 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2392ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2393 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2394ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2395 0) // __kmpc_atomic_fixed4_xor_cpt
2396ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2397 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2398ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2399 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2400ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2401 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2402ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2403 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2404ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2405 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2406ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2407 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2408ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2409 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2410ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2411 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2412ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2413 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2414ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2415 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2416ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2417 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2418ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2419 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2420ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2421 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2422// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2423
2424// CAPTURE routines for mixed types RHS=float16
2425#if KMP_HAVE_QUAD
2426
2427// Beginning of a definition (provides name, parameters, gebug trace)
2428// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2429// fixed)
2430// OP_ID - operation identifier (add, sub, mul, ...)
2431// TYPE - operands' type
2432#define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2433 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2434 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2435 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2436 KA_TRACE(100, \
2437 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2438 gtid));
2439
2440// -------------------------------------------------------------------------
2441#define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2442 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2443 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2444 TYPE new_value; \
2445 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2446 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2447 }
2448
2449// -------------------------------------------------------------------------
2450#define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2451 LCK_ID, GOMP_FLAG) \
2452 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2453 TYPE new_value; \
2454 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2455 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2456 }
2457
2458ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2459 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2460ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2461 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2462ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2463 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2464ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2465 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2466ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2467 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2468ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2469 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2470ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2471 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2472ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2473 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2474
2475ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2476 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2477ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2478 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2479ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2480 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2481ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2482 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2483ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2484 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2485ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2486 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2487ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2488 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2489ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2490 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2491
2492ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2493 0) // __kmpc_atomic_fixed4_add_cpt_fp
2494ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2495 0) // __kmpc_atomic_fixed4u_add_cpt_fp
2496ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2497 0) // __kmpc_atomic_fixed4_sub_cpt_fp
2498ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2499 0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2500ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2501 0) // __kmpc_atomic_fixed4_mul_cpt_fp
2502ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2503 0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2504ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2505 0) // __kmpc_atomic_fixed4_div_cpt_fp
2506ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2507 0) // __kmpc_atomic_fixed4u_div_cpt_fp
2508
2509ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2510 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2511ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2512 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2513ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2514 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2515ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2516 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2517ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2518 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2519ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2520 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2521ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2522 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2523ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2524 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2525
2526ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2527 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2528ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2529 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2530ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2531 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2532ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2533 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2534
2535ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2536 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2537ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2538 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2539ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2540 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2541ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2542 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2543
2544ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2545 1) // __kmpc_atomic_float10_add_cpt_fp
2546ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2547 1) // __kmpc_atomic_float10_sub_cpt_fp
2548ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2549 1) // __kmpc_atomic_float10_mul_cpt_fp
2550ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2551 1) // __kmpc_atomic_float10_div_cpt_fp
2552
2553#endif // KMP_HAVE_QUAD
2554
2555// ------------------------------------------------------------------------
2556// Routines for C/C++ Reduction operators && and ||
2557
2558// -------------------------------------------------------------------------
2559// Operation on *lhs, rhs bound by critical section
2560// OP - operator (it's supposed to contain an assignment)
2561// LCK_ID - lock identifier
2562// Note: don't check gtid as it should always be valid
2563// 1, 2-byte - expect valid parameter, other - check before this macro
2564#define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2565 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2566 \
2567 if (flag) { \
2568 new_value OP rhs; \
2569 (*lhs) = new_value; \
2570 } else { \
2571 new_value = (*lhs); \
2572 (*lhs) OP rhs; \
2573 } \
2574 \
2575 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2576
2577// ------------------------------------------------------------------------
2578#ifdef KMP_GOMP_COMPAT
2579#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2580 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2581 KMP_CHECK_GTID; \
2582 OP_CRITICAL_L_CPT(OP, 0); \
2583 return new_value; \
2584 }
2585#else
2586#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2587#endif /* KMP_GOMP_COMPAT */
2588
2589// ------------------------------------------------------------------------
2590// Need separate macros for &&, || because there is no combined assignment
2591#define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2592 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2593 TYPE new_value; \
2594 (void)new_value; \
2595 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2596 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2597 }
2598
2599ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2600 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2601ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2602 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2603ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2604 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2605ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2606 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2607ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2608 0) // __kmpc_atomic_fixed4_andl_cpt
2609ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2610 0) // __kmpc_atomic_fixed4_orl_cpt
2611ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2612 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2613ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2614 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2615
2616// -------------------------------------------------------------------------
2617// Routines for Fortran operators that matched no one in C:
2618// MAX, MIN, .EQV., .NEQV.
2619// Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2620// Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2621
2622// -------------------------------------------------------------------------
2623// MIN and MAX need separate macros
2624// OP - operator to check if we need any actions?
2625#define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2626 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2627 \
2628 if (*lhs OP rhs) { /* still need actions? */ \
2629 old_value = *lhs; \
2630 *lhs = rhs; \
2631 if (flag) \
2632 new_value = rhs; \
2633 else \
2634 new_value = old_value; \
2635 } else { \
2636 new_value = *lhs; \
2637 } \
2638 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2639 return new_value;
2640
2641// -------------------------------------------------------------------------
2642#ifdef KMP_GOMP_COMPAT
2643#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2644 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2645 KMP_CHECK_GTID; \
2646 MIN_MAX_CRITSECT_CPT(OP, 0); \
2647 }
2648#else
2649#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2650#endif /* KMP_GOMP_COMPAT */
2651
2652// -------------------------------------------------------------------------
2653#define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2654 { \
2655 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2656 /*TYPE old_value; */ \
2657 temp_val = *lhs; \
2658 old_value = temp_val; \
2659 while (old_value OP rhs && /* still need actions? */ \
2660 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2661 (kmp_int##BITS *)lhs, \
2662 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2663 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2664 temp_val = *lhs; \
2665 old_value = temp_val; \
2666 } \
2667 if (flag) \
2668 return rhs; \
2669 else \
2670 return old_value; \
2671 }
2672
2673// -------------------------------------------------------------------------
2674// 1-byte, 2-byte operands - use critical section
2675#define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2676 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2677 TYPE new_value, old_value; \
2678 if (*lhs OP rhs) { /* need actions? */ \
2679 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2680 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2681 } \
2682 return *lhs; \
2683 }
2684
2685#define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2686 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2687 TYPE new_value, old_value; \
2688 (void)new_value; \
2689 if (*lhs OP rhs) { \
2690 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2691 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2692 } \
2693 return *lhs; \
2694 }
2695
2696MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2697 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2698MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2699 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2700MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2701 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2702MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2703 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2704MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2705 0) // __kmpc_atomic_fixed4_max_cpt
2706MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2707 0) // __kmpc_atomic_fixed4_min_cpt
2708MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2709 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2710MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2711 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2712MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2713 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2714MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2715 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2716MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2717 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2718MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2719 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2720#if KMP_HAVE_QUAD
2721MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2722 1) // __kmpc_atomic_float16_max_cpt
2723MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2724 1) // __kmpc_atomic_float16_min_cpt
2725#if (KMP_ARCH_X86)
2726MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2727 1) // __kmpc_atomic_float16_max_a16_cpt
2728MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2729 1) // __kmpc_atomic_float16_mix_a16_cpt
2730#endif // (KMP_ARCH_X86)
2731#endif // KMP_HAVE_QUAD
2732
2733// ------------------------------------------------------------------------
2734#ifdef KMP_GOMP_COMPAT
2735#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2736 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2737 KMP_CHECK_GTID; \
2738 OP_CRITICAL_CPT(OP, 0); \
2739 }
2740#else
2741#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2742#endif /* KMP_GOMP_COMPAT */
2743// ------------------------------------------------------------------------
2744#define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2745 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2746 TYPE new_value; \
2747 (void)new_value; \
2748 OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
2749 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2750 }
2751
2752// ------------------------------------------------------------------------
2753
2754ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2755 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2756ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2757 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2758ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2759 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2760ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2761 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2762ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2763 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2764ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2765 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2766ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2767 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2768ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2769 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2770
2771// ------------------------------------------------------------------------
2772// Routines for Extended types: long double, _Quad, complex flavours (use
2773// critical section)
2774// TYPE_ID, OP_ID, TYPE - detailed above
2775// OP - operator
2776// LCK_ID - lock identifier, used to possibly distinguish lock variable
2777#define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2778 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2779 TYPE new_value; \
2780 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2781 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2782 }
2783
2784// ------------------------------------------------------------------------
2785// Workaround for cmplx4. Regular routines with return value don't work
2786// on Win_32e. Let's return captured values through the additional parameter.
2787#define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2788 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2789 \
2790 if (flag) { \
2791 (*lhs) OP rhs; \
2792 (*out) = (*lhs); \
2793 } else { \
2794 (*out) = (*lhs); \
2795 (*lhs) OP rhs; \
2796 } \
2797 \
2798 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2799 return;
2800// ------------------------------------------------------------------------
2801
2802#ifdef KMP_GOMP_COMPAT
2803#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2804 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2805 KMP_CHECK_GTID; \
2806 OP_CRITICAL_CPT_WRK(OP## =, 0); \
2807 }
2808#else
2809#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2810#endif /* KMP_GOMP_COMPAT */
2811// ------------------------------------------------------------------------
2812
2813#define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2814 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2815 TYPE rhs, TYPE *out, int flag) { \
2816 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2817 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2818// ------------------------------------------------------------------------
2819
2820#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2821 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2822 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2823 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2824 }
2825// The end of workaround for cmplx4
2826
2827/* ------------------------------------------------------------------------- */
2828// routines for long double type
2829ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2830 1) // __kmpc_atomic_float10_add_cpt
2831ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2832 1) // __kmpc_atomic_float10_sub_cpt
2833ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2834 1) // __kmpc_atomic_float10_mul_cpt
2835ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2836 1) // __kmpc_atomic_float10_div_cpt
2837#if KMP_HAVE_QUAD
2838// routines for _Quad type
2839ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2840 1) // __kmpc_atomic_float16_add_cpt
2841ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2842 1) // __kmpc_atomic_float16_sub_cpt
2843ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2844 1) // __kmpc_atomic_float16_mul_cpt
2845ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2846 1) // __kmpc_atomic_float16_div_cpt
2847#if (KMP_ARCH_X86)
2848ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2849 1) // __kmpc_atomic_float16_add_a16_cpt
2850ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2851 1) // __kmpc_atomic_float16_sub_a16_cpt
2852ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2853 1) // __kmpc_atomic_float16_mul_a16_cpt
2854ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2855 1) // __kmpc_atomic_float16_div_a16_cpt
2856#endif // (KMP_ARCH_X86)
2857#endif // KMP_HAVE_QUAD
2858
2859// routines for complex types
2860
2861// cmplx4 routines to return void
2862ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2863 1) // __kmpc_atomic_cmplx4_add_cpt
2864ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2865 1) // __kmpc_atomic_cmplx4_sub_cpt
2866ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2867 1) // __kmpc_atomic_cmplx4_mul_cpt
2868ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2869 1) // __kmpc_atomic_cmplx4_div_cpt
2870
2871ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2872 1) // __kmpc_atomic_cmplx8_add_cpt
2873ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2874 1) // __kmpc_atomic_cmplx8_sub_cpt
2875ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2876 1) // __kmpc_atomic_cmplx8_mul_cpt
2877ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2878 1) // __kmpc_atomic_cmplx8_div_cpt
2879ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2880 1) // __kmpc_atomic_cmplx10_add_cpt
2881ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2882 1) // __kmpc_atomic_cmplx10_sub_cpt
2883ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2884 1) // __kmpc_atomic_cmplx10_mul_cpt
2885ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2886 1) // __kmpc_atomic_cmplx10_div_cpt
2887#if KMP_HAVE_QUAD
2888ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2889 1) // __kmpc_atomic_cmplx16_add_cpt
2890ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2891 1) // __kmpc_atomic_cmplx16_sub_cpt
2892ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2893 1) // __kmpc_atomic_cmplx16_mul_cpt
2894ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2895 1) // __kmpc_atomic_cmplx16_div_cpt
2896#if (KMP_ARCH_X86)
2897ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2898 1) // __kmpc_atomic_cmplx16_add_a16_cpt
2899ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2900 1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2901ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2902 1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2903ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2904 1) // __kmpc_atomic_cmplx16_div_a16_cpt
2905#endif // (KMP_ARCH_X86)
2906#endif // KMP_HAVE_QUAD
2907
2908// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2909// binop x; v = x; } for non-commutative operations.
2910// Supported only on IA-32 architecture and Intel(R) 64
2911
2912// -------------------------------------------------------------------------
2913// Operation on *lhs, rhs bound by critical section
2914// OP - operator (it's supposed to contain an assignment)
2915// LCK_ID - lock identifier
2916// Note: don't check gtid as it should always be valid
2917// 1, 2-byte - expect valid parameter, other - check before this macro
2918#define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
2919 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2920 \
2921 if (flag) { \
2922 /*temp_val = (*lhs);*/ \
2923 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2924 new_value = (*lhs); \
2925 } else { \
2926 new_value = (*lhs); \
2927 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2928 } \
2929 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2930 return new_value;
2931
2932// ------------------------------------------------------------------------
2933#ifdef KMP_GOMP_COMPAT
2934#define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \
2935 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2936 KMP_CHECK_GTID; \
2937 OP_CRITICAL_CPT_REV(TYPE, OP, 0); \
2938 }
2939#else
2940#define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2941#endif /* KMP_GOMP_COMPAT */
2942
2943// ------------------------------------------------------------------------
2944// Operation on *lhs, rhs using "compare_and_store" routine
2945// TYPE - operands' type
2946// BITS - size in bits, used to distinguish low level calls
2947// OP - operator
2948// Note: temp_val introduced in order to force the compiler to read
2949// *lhs only once (w/o it the compiler reads *lhs twice)
2950#define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2951 { \
2952 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2953 TYPE old_value, new_value; \
2954 temp_val = *lhs; \
2955 old_value = temp_val; \
2956 new_value = (TYPE)(rhs OP old_value); \
2957 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2958 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2959 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2960 temp_val = *lhs; \
2961 old_value = temp_val; \
2962 new_value = (TYPE)(rhs OP old_value); \
2963 } \
2964 if (flag) { \
2965 return new_value; \
2966 } else \
2967 return old_value; \
2968 }
2969
2970// -------------------------------------------------------------------------
2971#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2972 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2973 TYPE new_value; \
2974 (void)new_value; \
2975 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
2976 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2977 }
2978
2979ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2980 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2981ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2982 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2983ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2984 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2985ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2986 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2987ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2988 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2989ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2990 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2991ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2992 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2993ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2994 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2995ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2996 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2997ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2998 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2999ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
3000 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
3001ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
3002 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
3003ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
3004 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
3005ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
3006 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
3007ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
3008 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
3009ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
3010 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
3011ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
3012 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
3013ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
3014 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
3015ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
3016 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
3017ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
3018 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
3019ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
3020 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
3021ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
3022 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
3023ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
3024 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
3025ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
3026 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
3027ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
3028 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
3029ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
3030 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3031ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3032 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3033ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3034 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3035// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
3036
3037// ------------------------------------------------------------------------
3038// Routines for Extended types: long double, _Quad, complex flavours (use
3039// critical section)
3040// TYPE_ID, OP_ID, TYPE - detailed above
3041// OP - operator
3042// LCK_ID - lock identifier, used to possibly distinguish lock variable
3043#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
3044 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
3045 TYPE new_value; \
3046 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
3047 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3048 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
3049 }
3050
3051/* ------------------------------------------------------------------------- */
3052// routines for long double type
3053ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3054 1) // __kmpc_atomic_float10_sub_cpt_rev
3055ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3056 1) // __kmpc_atomic_float10_div_cpt_rev
3057#if KMP_HAVE_QUAD
3058// routines for _Quad type
3059ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3060 1) // __kmpc_atomic_float16_sub_cpt_rev
3061ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3062 1) // __kmpc_atomic_float16_div_cpt_rev
3063#if (KMP_ARCH_X86)
3064ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3065 1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3066ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3067 1) // __kmpc_atomic_float16_div_a16_cpt_rev
3068#endif // (KMP_ARCH_X86)
3069#endif // KMP_HAVE_QUAD
3070
3071// routines for complex types
3072
3073// ------------------------------------------------------------------------
3074// Workaround for cmplx4. Regular routines with return value don't work
3075// on Win_32e. Let's return captured values through the additional parameter.
3076#define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3077 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3078 \
3079 if (flag) { \
3080 (*lhs) = (rhs)OP(*lhs); \
3081 (*out) = (*lhs); \
3082 } else { \
3083 (*out) = (*lhs); \
3084 (*lhs) = (rhs)OP(*lhs); \
3085 } \
3086 \
3087 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3088 return;
3089// ------------------------------------------------------------------------
3090
3091#ifdef KMP_GOMP_COMPAT
3092#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3093 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3094 KMP_CHECK_GTID; \
3095 OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3096 }
3097#else
3098#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3099#endif /* KMP_GOMP_COMPAT */
3100// ------------------------------------------------------------------------
3101
3102#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3103 GOMP_FLAG) \
3104 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3105 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3106 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3107 }
3108// The end of workaround for cmplx4
3109
3110// !!! TODO: check if we need to return void for cmplx4 routines
3111// cmplx4 routines to return void
3112ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3113 1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3114ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3115 1) // __kmpc_atomic_cmplx4_div_cpt_rev
3116
3117ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3118 1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3119ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3120 1) // __kmpc_atomic_cmplx8_div_cpt_rev
3121ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3122 1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3123ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3124 1) // __kmpc_atomic_cmplx10_div_cpt_rev
3125#if KMP_HAVE_QUAD
3126ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3127 1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3128ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3129 1) // __kmpc_atomic_cmplx16_div_cpt_rev
3130#if (KMP_ARCH_X86)
3131ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3132 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3133ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3134 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3135#endif // (KMP_ARCH_X86)
3136#endif // KMP_HAVE_QUAD
3137
3138// Capture reverse for mixed type: RHS=float16
3139#if KMP_HAVE_QUAD
3140
3141// Beginning of a definition (provides name, parameters, gebug trace)
3142// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3143// fixed)
3144// OP_ID - operation identifier (add, sub, mul, ...)
3145// TYPE - operands' type
3146// -------------------------------------------------------------------------
3147#define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3148 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3149 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3150 TYPE new_value; \
3151 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3152 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3153 }
3154
3155// -------------------------------------------------------------------------
3156#define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3157 LCK_ID, GOMP_FLAG) \
3158 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3159 TYPE new_value; \
3160 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
3161 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
3162 }
3163
3164ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3165 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3166ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3167 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3168ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3169 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3170ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3171 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3172
3173ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3174 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3175ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3176 1,
3177 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3178ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3179 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3180ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3181 1,
3182 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3183
3184ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3185 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3186ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3187 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3188ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3189 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3190ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3191 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3192
3193ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3194 7,
3195 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3196ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3197 8i, 7,
3198 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3199ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3200 7,
3201 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3202ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3203 8i, 7,
3204 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3205
3206ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3207 4r, 3,
3208 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3209ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3210 4r, 3,
3211 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3212
3213ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3214 8r, 7,
3215 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3216ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3217 8r, 7,
3218 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3219
3220ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3221 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3222ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3223 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3224
3225#endif // KMP_HAVE_QUAD
3226
3227// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3228
3229#define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3230 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3231 TYPE rhs) { \
3232 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3233 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3234
3235#define CRITICAL_SWP(LCK_ID) \
3236 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3237 \
3238 old_value = (*lhs); \
3239 (*lhs) = rhs; \
3240 \
3241 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3242 return old_value;
3243
3244// ------------------------------------------------------------------------
3245#ifdef KMP_GOMP_COMPAT
3246#define GOMP_CRITICAL_SWP(FLAG) \
3247 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3248 KMP_CHECK_GTID; \
3249 CRITICAL_SWP(0); \
3250 }
3251#else
3252#define GOMP_CRITICAL_SWP(FLAG)
3253#endif /* KMP_GOMP_COMPAT */
3254
3255#define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3256 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3257 TYPE old_value; \
3258 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3259 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3260 return old_value; \
3261 }
3262// ------------------------------------------------------------------------
3263#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3264 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3265 TYPE old_value; \
3266 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3267 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3268 return old_value; \
3269 }
3270
3271// ------------------------------------------------------------------------
3272#define CMPXCHG_SWP(TYPE, BITS) \
3273 { \
3274 TYPE KMP_ATOMIC_VOLATILE temp_val; \
3275 TYPE old_value, new_value; \
3276 temp_val = *lhs; \
3277 old_value = temp_val; \
3278 new_value = rhs; \
3279 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3280 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3281 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3282 temp_val = *lhs; \
3283 old_value = temp_val; \
3284 new_value = rhs; \
3285 } \
3286 return old_value; \
3287 }
3288
3289// -------------------------------------------------------------------------
3290#define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3291 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3292 TYPE old_value; \
3293 (void)old_value; \
3294 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3295 CMPXCHG_SWP(TYPE, BITS) \
3296 }
3297
3298ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3299ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3300ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3301
3302ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3303 KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3304
3305#if (KMP_ARCH_X86)
3306ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3307 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3308ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3309 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3310#else
3311ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3312ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3313 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3314#endif // (KMP_ARCH_X86)
3315
3316// ------------------------------------------------------------------------
3317// Routines for Extended types: long double, _Quad, complex flavours (use
3318// critical section)
3319#define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3320 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3321 TYPE old_value; \
3322 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3323 CRITICAL_SWP(LCK_ID) \
3324 }
3325
3326// ------------------------------------------------------------------------
3327// !!! TODO: check if we need to return void for cmplx4 routines
3328// Workaround for cmplx4. Regular routines with return value don't work
3329// on Win_32e. Let's return captured values through the additional parameter.
3330
3331#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3332 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3333 TYPE rhs, TYPE *out) { \
3334 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3335 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3336
3337#define CRITICAL_SWP_WRK(LCK_ID) \
3338 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3339 \
3340 tmp = (*lhs); \
3341 (*lhs) = (rhs); \
3342 (*out) = tmp; \
3343 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3344 return;
3345// ------------------------------------------------------------------------
3346
3347#ifdef KMP_GOMP_COMPAT
3348#define GOMP_CRITICAL_SWP_WRK(FLAG) \
3349 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3350 KMP_CHECK_GTID; \
3351 CRITICAL_SWP_WRK(0); \
3352 }
3353#else
3354#define GOMP_CRITICAL_SWP_WRK(FLAG)
3355#endif /* KMP_GOMP_COMPAT */
3356// ------------------------------------------------------------------------
3357
3358#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3359 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3360 TYPE tmp; \
3361 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3362 CRITICAL_SWP_WRK(LCK_ID) \
3363 }
3364// The end of workaround for cmplx4
3365
3366ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3367#if KMP_HAVE_QUAD
3368ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3369#endif // KMP_HAVE_QUAD
3370// cmplx4 routine to return void
3371ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3372
3373// ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3374// __kmpc_atomic_cmplx4_swp
3375
3376ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3377ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3378#if KMP_HAVE_QUAD
3379ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3380#if (KMP_ARCH_X86)
3381ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3382 1) // __kmpc_atomic_float16_a16_swp
3383ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3384 1) // __kmpc_atomic_cmplx16_a16_swp
3385#endif // (KMP_ARCH_X86)
3386#endif // KMP_HAVE_QUAD
3387
3388// End of OpenMP 4.0 Capture
3389
3390#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3391
3392#undef OP_CRITICAL
3393
3394/* ------------------------------------------------------------------------ */
3395/* Generic atomic routines */
3396
3397void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3398 void (*f)(void *, void *, void *)) {
3399 KMP_DEBUG_ASSERT(__kmp_init_serial);
3400
3401 if (
3402#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3403 FALSE /* must use lock */
3404#else
3405 TRUE
3406#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3407 ) {
3408 kmp_int8 old_value, new_value;
3409
3410 old_value = *(kmp_int8 *)lhs;
3411 (*f)(&new_value, &old_value, rhs);
3412
3413 /* TODO: Should this be acquire or release? */
3414 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3415 *(kmp_int8 *)&new_value)) {
3416 KMP_CPU_PAUSE();
3417
3418 old_value = *(kmp_int8 *)lhs;
3419 (*f)(&new_value, &old_value, rhs);
3420 }
3421
3422 return;
3423 } else {
3424 // All 1-byte data is of integer data type.
3425
3426#ifdef KMP_GOMP_COMPAT
3427 if (__kmp_atomic_mode == 2) {
3428 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3429 } else
3430#endif /* KMP_GOMP_COMPAT */
3431 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3432
3433 (*f)(lhs, lhs, rhs);
3434
3435#ifdef KMP_GOMP_COMPAT
3436 if (__kmp_atomic_mode == 2) {
3437 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3438 } else
3439#endif /* KMP_GOMP_COMPAT */
3440 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3441 }
3442}
3443
3444void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3445 void (*f)(void *, void *, void *)) {
3446 if (
3447#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3448 FALSE /* must use lock */
3449#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3450 TRUE /* no alignment problems */
3451#else
3452 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3453#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3454 ) {
3455 kmp_int16 old_value, new_value;
3456
3457 old_value = *(kmp_int16 *)lhs;
3458 (*f)(&new_value, &old_value, rhs);
3459
3460 /* TODO: Should this be acquire or release? */
3461 while (!KMP_COMPARE_AND_STORE_ACQ16(
3462 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3463 KMP_CPU_PAUSE();
3464
3465 old_value = *(kmp_int16 *)lhs;
3466 (*f)(&new_value, &old_value, rhs);
3467 }
3468
3469 return;
3470 } else {
3471 // All 2-byte data is of integer data type.
3472
3473#ifdef KMP_GOMP_COMPAT
3474 if (__kmp_atomic_mode == 2) {
3475 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3476 } else
3477#endif /* KMP_GOMP_COMPAT */
3478 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3479
3480 (*f)(lhs, lhs, rhs);
3481
3482#ifdef KMP_GOMP_COMPAT
3483 if (__kmp_atomic_mode == 2) {
3484 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3485 } else
3486#endif /* KMP_GOMP_COMPAT */
3487 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3488 }
3489}
3490
3491void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3492 void (*f)(void *, void *, void *)) {
3493 KMP_DEBUG_ASSERT(__kmp_init_serial);
3494
3495 if (
3496// FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3497// Gomp compatibility is broken if this routine is called for floats.
3498#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3499 TRUE /* no alignment problems */
3500#else
3501 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3502#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3503 ) {
3504 kmp_int32 old_value, new_value;
3505
3506 old_value = *(kmp_int32 *)lhs;
3507 (*f)(&new_value, &old_value, rhs);
3508
3509 /* TODO: Should this be acquire or release? */
3510 while (!KMP_COMPARE_AND_STORE_ACQ32(
3511 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3512 KMP_CPU_PAUSE();
3513
3514 old_value = *(kmp_int32 *)lhs;
3515 (*f)(&new_value, &old_value, rhs);
3516 }
3517
3518 return;
3519 } else {
3520 // Use __kmp_atomic_lock_4i for all 4-byte data,
3521 // even if it isn't of integer data type.
3522
3523#ifdef KMP_GOMP_COMPAT
3524 if (__kmp_atomic_mode == 2) {
3525 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3526 } else
3527#endif /* KMP_GOMP_COMPAT */
3528 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3529
3530 (*f)(lhs, lhs, rhs);
3531
3532#ifdef KMP_GOMP_COMPAT
3533 if (__kmp_atomic_mode == 2) {
3534 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3535 } else
3536#endif /* KMP_GOMP_COMPAT */
3537 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3538 }
3539}
3540
3541void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3542 void (*f)(void *, void *, void *)) {
3543 KMP_DEBUG_ASSERT(__kmp_init_serial);
3544 if (
3545
3546#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3547 FALSE /* must use lock */
3548#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3549 TRUE /* no alignment problems */
3550#else
3551 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3552#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3553 ) {
3554 kmp_int64 old_value, new_value;
3555
3556 old_value = *(kmp_int64 *)lhs;
3557 (*f)(&new_value, &old_value, rhs);
3558 /* TODO: Should this be acquire or release? */
3559 while (!KMP_COMPARE_AND_STORE_ACQ64(
3560 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3561 KMP_CPU_PAUSE();
3562
3563 old_value = *(kmp_int64 *)lhs;
3564 (*f)(&new_value, &old_value, rhs);
3565 }
3566
3567 return;
3568 } else {
3569 // Use __kmp_atomic_lock_8i for all 8-byte data,
3570 // even if it isn't of integer data type.
3571
3572#ifdef KMP_GOMP_COMPAT
3573 if (__kmp_atomic_mode == 2) {
3574 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3575 } else
3576#endif /* KMP_GOMP_COMPAT */
3577 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3578
3579 (*f)(lhs, lhs, rhs);
3580
3581#ifdef KMP_GOMP_COMPAT
3582 if (__kmp_atomic_mode == 2) {
3583 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3584 } else
3585#endif /* KMP_GOMP_COMPAT */
3586 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3587 }
3588}
3589
3590void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3591 void (*f)(void *, void *, void *)) {
3592 KMP_DEBUG_ASSERT(__kmp_init_serial);
3593
3594#ifdef KMP_GOMP_COMPAT
3595 if (__kmp_atomic_mode == 2) {
3596 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3597 } else
3598#endif /* KMP_GOMP_COMPAT */
3599 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3600
3601 (*f)(lhs, lhs, rhs);
3602
3603#ifdef KMP_GOMP_COMPAT
3604 if (__kmp_atomic_mode == 2) {
3605 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3606 } else
3607#endif /* KMP_GOMP_COMPAT */
3608 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3609}
3610
3611void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3612 void (*f)(void *, void *, void *)) {
3613 KMP_DEBUG_ASSERT(__kmp_init_serial);
3614
3615#ifdef KMP_GOMP_COMPAT
3616 if (__kmp_atomic_mode == 2) {
3617 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3618 } else
3619#endif /* KMP_GOMP_COMPAT */
3620 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3621
3622 (*f)(lhs, lhs, rhs);
3623
3624#ifdef KMP_GOMP_COMPAT
3625 if (__kmp_atomic_mode == 2) {
3626 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3627 } else
3628#endif /* KMP_GOMP_COMPAT */
3629 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3630}
3631
3632void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3633 void (*f)(void *, void *, void *)) {
3634 KMP_DEBUG_ASSERT(__kmp_init_serial);
3635
3636#ifdef KMP_GOMP_COMPAT
3637 if (__kmp_atomic_mode == 2) {
3638 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3639 } else
3640#endif /* KMP_GOMP_COMPAT */
3641 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3642
3643 (*f)(lhs, lhs, rhs);
3644
3645#ifdef KMP_GOMP_COMPAT
3646 if (__kmp_atomic_mode == 2) {
3647 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3648 } else
3649#endif /* KMP_GOMP_COMPAT */
3650 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3651}
3652
3653void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3654 void (*f)(void *, void *, void *)) {
3655 KMP_DEBUG_ASSERT(__kmp_init_serial);
3656
3657#ifdef KMP_GOMP_COMPAT
3658 if (__kmp_atomic_mode == 2) {
3659 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3660 } else
3661#endif /* KMP_GOMP_COMPAT */
3662 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3663
3664 (*f)(lhs, lhs, rhs);
3665
3666#ifdef KMP_GOMP_COMPAT
3667 if (__kmp_atomic_mode == 2) {
3668 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3669 } else
3670#endif /* KMP_GOMP_COMPAT */
3671 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3672}
3673
3674// AC: same two routines as GOMP_atomic_start/end, but will be called by our
3675// compiler; duplicated in order to not use 3-party names in pure Intel code
3676// TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3677void __kmpc_atomic_start(void) {
3678 int gtid = __kmp_entry_gtid();
3679 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3680 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3681}
3682
3683void __kmpc_atomic_end(void) {
3684 int gtid = __kmp_get_gtid();
3685 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3686 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3687}
3688
3693// end of file
Definition kmp.h:233