--- zzzz-none-000/linux-3.10.107/arch/s390/include/asm/percpu.h	2017-06-27 09:49:32.000000000 +0000
+++ scorpion-7490-727/linux-3.10.107/arch/s390/include/asm/percpu.h	2021-02-04 17:41:59.000000000 +0000
@@ -15,57 +15,120 @@
  * per cpu area, use weak definitions to force the compiler to
  * generate external references.
  */
-#if defined(CONFIG_SMP) && defined(CONFIG_64BIT) && defined(MODULE)
+#if defined(CONFIG_SMP) && defined(MODULE)
 #define ARCH_NEEDS_WEAK_PER_CPU
 #endif
 
-#define arch_this_cpu_to_op(pcp, val, op)				\
+/*
+ * We use a compare-and-swap loop since that uses less cpu cycles than
+ * disabling and enabling interrupts like the generic variant would do.
+ */
+#define arch_this_cpu_to_op_simple(pcp, val, op)			\
 ({									\
 	typedef typeof(pcp) pcp_op_T__;					\
 	pcp_op_T__ old__, new__, prev__;				\
 	pcp_op_T__ *ptr__;						\
 	preempt_disable();						\
-	ptr__ = __this_cpu_ptr(&(pcp));					\
+	ptr__ = raw_cpu_ptr(&(pcp));					\
 	prev__ = *ptr__;						\
 	do {								\
 		old__ = prev__;						\
 		new__ = old__ op (val);					\
-		switch (sizeof(*ptr__)) {				\
-		case 8:							\
-			prev__ = cmpxchg64(ptr__, old__, new__);	\
-			break;						\
-		default:						\
-			prev__ = cmpxchg(ptr__, old__, new__);		\
-		}							\
+		prev__ = cmpxchg(ptr__, old__, new__);			\
 	} while (prev__ != old__);					\
 	preempt_enable();						\
 	new__;								\
 })
 
-#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op(pcp, val, +)
-#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op(pcp, val, +)
-#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op(pcp, val, +)
-#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op(pcp, val, +)
-
-#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op(pcp, val, +)
-#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op(pcp, val, +)
-#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op(pcp, val, +)
-#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op(pcp, val, +)
-
-#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op(pcp, val, &)
-#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op(pcp, val, &)
-#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, &)
-#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, &)
-
-#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op(pcp, val, |)
-#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op(pcp, val, |)
-#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, |)
-#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, |)
-
-#define this_cpu_xor_1(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
-#define this_cpu_xor_2(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
-#define this_cpu_xor_4(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
-#define this_cpu_xor_8(pcp, val) arch_this_cpu_to_op(pcp, val, ^)
+#define this_cpu_add_1(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_2(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_and_1(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_and_2(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_or_1(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
+#define this_cpu_or_2(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
+
+#ifndef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define this_cpu_add_4(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_8(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_and_4(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_and_8(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_or_4(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
+#define this_cpu_or_8(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define arch_this_cpu_add(pcp, val, op1, op2, szcast)			\
+{									\
+	typedef typeof(pcp) pcp_op_T__; 				\
+	pcp_op_T__ val__ = (val);					\
+	pcp_op_T__ old__, *ptr__;					\
+	preempt_disable();						\
+	ptr__ = raw_cpu_ptr(&(pcp)); 				\
+	if (__builtin_constant_p(val__) &&				\
+	    ((szcast)val__ > -129) && ((szcast)val__ < 128)) {		\
+		asm volatile(						\
+			op2 "   %[ptr__],%[val__]\n"			\
+			: [ptr__] "+Q" (*ptr__) 			\
+			: [val__] "i" ((szcast)val__)			\
+			: "cc");					\
+	} else {							\
+		asm volatile(						\
+			op1 "   %[old__],%[val__],%[ptr__]\n"		\
+			: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__)	\
+			: [val__] "d" (val__)				\
+			: "cc");					\
+	}								\
+	preempt_enable();						\
+}
+
+#define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int)
+#define this_cpu_add_8(pcp, val) arch_this_cpu_add(pcp, val, "laag", "agsi", long)
+
+#define arch_this_cpu_add_return(pcp, val, op)				\
+({									\
+	typedef typeof(pcp) pcp_op_T__; 				\
+	pcp_op_T__ val__ = (val);					\
+	pcp_op_T__ old__, *ptr__;					\
+	preempt_disable();						\
+	ptr__ = raw_cpu_ptr(&(pcp));	 				\
+	asm volatile(							\
+		op "    %[old__],%[val__],%[ptr__]\n"			\
+		: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__)		\
+		: [val__] "d" (val__)					\
+		: "cc");						\
+	preempt_enable();						\
+	old__ + val__;							\
+})
+
+#define this_cpu_add_return_4(pcp, val) arch_this_cpu_add_return(pcp, val, "laa")
+#define this_cpu_add_return_8(pcp, val) arch_this_cpu_add_return(pcp, val, "laag")
+
+#define arch_this_cpu_to_op(pcp, val, op)				\
+{									\
+	typedef typeof(pcp) pcp_op_T__; 				\
+	pcp_op_T__ val__ = (val);					\
+	pcp_op_T__ old__, *ptr__;					\
+	preempt_disable();						\
+	ptr__ = raw_cpu_ptr(&(pcp));	 				\
+	asm volatile(							\
+		op "    %[old__],%[val__],%[ptr__]\n"			\
+		: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__)		\
+		: [val__] "d" (val__)					\
+		: "cc");						\
+	preempt_enable();						\
+}
+
+#define this_cpu_and_4(pcp, val)	arch_this_cpu_to_op(pcp, val, "lan")
+#define this_cpu_and_8(pcp, val)	arch_this_cpu_to_op(pcp, val, "lang")
+#define this_cpu_or_4(pcp, val)		arch_this_cpu_to_op(pcp, val, "lao")
+#define this_cpu_or_8(pcp, val)		arch_this_cpu_to_op(pcp, val, "laog")
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
 
 #define arch_this_cpu_cmpxchg(pcp, oval, nval)				\
 ({									\
@@ -73,14 +136,8 @@
 	pcp_op_T__ ret__;						\
 	pcp_op_T__ *ptr__;						\
 	preempt_disable();						\
-	ptr__ = __this_cpu_ptr(&(pcp));					\
-	switch (sizeof(*ptr__)) {					\
-	case 8:								\
-		ret__ = cmpxchg64(ptr__, oval, nval);			\
-		break;							\
-	default:							\
-		ret__ = cmpxchg(ptr__, oval, nval);			\
-	}								\
+	ptr__ = raw_cpu_ptr(&(pcp));					\
+	ret__ = cmpxchg(ptr__, oval, nval);				\
 	preempt_enable();						\
 	ret__;								\
 })
@@ -95,7 +152,7 @@
 	typeof(pcp) *ptr__;						\
 	typeof(pcp) ret__;						\
 	preempt_disable();						\
-	ptr__ = __this_cpu_ptr(&(pcp));					\
+	ptr__ = raw_cpu_ptr(&(pcp));					\
 	ret__ = xchg(ptr__, nval);					\
 	preempt_enable();						\
 	ret__;								\
@@ -104,9 +161,7 @@
 #define this_cpu_xchg_1(pcp, nval) arch_this_cpu_xchg(pcp, nval)
 #define this_cpu_xchg_2(pcp, nval) arch_this_cpu_xchg(pcp, nval)
 #define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval)
-#ifdef CONFIG_64BIT
 #define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval)
-#endif
 
 #define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2)	\
 ({									\
@@ -116,17 +171,15 @@
 	typeof(pcp2) *p2__;						\
 	int ret__;							\
 	preempt_disable();						\
-	p1__ = __this_cpu_ptr(&(pcp1));					\
-	p2__ = __this_cpu_ptr(&(pcp2));					\
+	p1__ = raw_cpu_ptr(&(pcp1));					\
+	p2__ = raw_cpu_ptr(&(pcp2));					\
 	ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__);	\
 	preempt_enable();						\
 	ret__;								\
 })
 
 #define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double
-#ifdef CONFIG_64BIT
 #define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double
-#endif
 
 #include <asm-generic/percpu.h>