diff --git a/arch/Kconfig b/arch/Kconfig
index 8f84fd268dee6a2c4bf3792352c48ff11e2e6222..b16e74e4b5afbbcfad4091c4067c638d2e50da3c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -647,4 +647,7 @@ config COMPAT_OLD_SIGACTION
 config ARCH_NO_COHERENT_DMA_MMAP
 	bool
 
+config CPU_NO_EFFICIENT_FFS
+	def_bool n
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index fe99f894e57d647f36c18177311eb1355b6332ff..7f312d80b43b4327611295a8506f1894754b4544 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -26,6 +26,7 @@ config ALPHA
 	select MODULES_USE_ELF_RELA
 	select ODD_RT_SIGACTION
 	select OLD_SIGSUSPEND
+	select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
 	help
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 8894f7e7e3de2a6e6cc1947d93a9359be961c699..0dcbacfdea4b98ca7805624a5ffb59b9298c4992 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -107,6 +107,7 @@ choice
 
 config ISA_ARCOMPACT
 	bool "ARCompact ISA"
+	select CPU_NO_EFFICIENT_FFS
 	help
 	  The original ARC ISA of ARC600/700 cores
 
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 55347662e5edafa0900b54987b219a5fb83a6b5a..cb569b65a54ddcdaa8cce08d047799a24ba75f4b 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -421,18 +421,21 @@ config CPU_32v3
 	select CPU_USE_DOMAINS if MMU
 	select NEED_KUSER_HELPERS
 	select TLS_REG_EMUL if SMP || !MMU
+	select CPU_NO_EFFICIENT_FFS
 
 config CPU_32v4
 	bool
 	select CPU_USE_DOMAINS if MMU
 	select NEED_KUSER_HELPERS
 	select TLS_REG_EMUL if SMP || !MMU
+	select CPU_NO_EFFICIENT_FFS
 
 config CPU_32v4T
 	bool
 	select CPU_USE_DOMAINS if MMU
 	select NEED_KUSER_HELPERS
 	select TLS_REG_EMUL if SMP || !MMU
+	select CPU_NO_EFFICIENT_FFS
 
 config CPU_32v5
 	bool
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 986ea84caaed48a9fd4d16a7b1195a7b97b87eb5..aa232de2d4bc854a7c25b73a538dc7bcf7df755e 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -20,6 +20,7 @@ config H8300
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZO
 	select HAVE_ARCH_KGDB
+	select CPU_NO_EFFICIENT_FFS
 
 config RWSEM_GENERIC_SPINLOCK
 	def_bool y
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index c82b2925399116e87128e0c187fde3d9e7aadb75..3cc8498fe0fe8c983031c73a72c3b2c168f92218 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -17,6 +17,7 @@ config M32R
 	select ARCH_USES_GETTIMEOFFSET
 	select MODULES_USE_ELF_RELA
 	select HAVE_DEBUG_STACKOVERFLOW
+	select CPU_NO_EFFICIENT_FFS
 
 config SBUS
 	bool
diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index c1beb5ae181fa61648fab7f44040c940c8372aa1..8ace920ca24aaf1d491db0f7038a60e2bbc58237 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -40,6 +40,7 @@ config M68000
 	select CPU_HAS_NO_MULDIV64
 	select CPU_HAS_NO_UNALIGNED
 	select GENERIC_CSUM
+	select CPU_NO_EFFICIENT_FFS
 	help
 	  The Freescale (was Motorola) 68000 CPU is the first generation of
 	  the well known M68K family of processors. The CPU core as well as
@@ -51,6 +52,7 @@ config MCPU32
 	bool
 	select CPU_HAS_NO_BITFIELDS
 	select CPU_HAS_NO_UNALIGNED
+	select CPU_NO_EFFICIENT_FFS
 	help
 	  The Freescale (was then Motorola) CPU32 is a CPU core that is
 	  based on the 68020 processor. For the most part it is used in
@@ -130,6 +132,7 @@ config M5206
 	depends on !MMU
 	select COLDFIRE_SW_A7
 	select HAVE_MBAR
+	select CPU_NO_EFFICIENT_FFS
 	help
 	  Motorola ColdFire 5206 processor support.
 
@@ -138,6 +141,7 @@ config M5206e
 	depends on !MMU
 	select COLDFIRE_SW_A7
 	select HAVE_MBAR
+	select CPU_NO_EFFICIENT_FFS
 	help
 	  Motorola ColdFire 5206e processor support.
 
@@ -163,6 +167,7 @@ config M5249
 	depends on !MMU
 	select COLDFIRE_SW_A7
 	select HAVE_MBAR
+	select CPU_NO_EFFICIENT_FFS
 	help
 	  Motorola ColdFire 5249 processor support.
 
@@ -171,6 +176,7 @@ config M525x
 	depends on !MMU
 	select COLDFIRE_SW_A7
 	select HAVE_MBAR
+	select CPU_NO_EFFICIENT_FFS
 	help
 	  Freescale (Motorola) Coldfire 5251/5253 processor support.
 
@@ -189,6 +195,7 @@ config M5272
 	depends on !MMU
 	select COLDFIRE_SW_A7
 	select HAVE_MBAR
+	select CPU_NO_EFFICIENT_FFS
 	help
 	  Motorola ColdFire 5272 processor support.
 
@@ -217,6 +224,7 @@ config M5307
 	select COLDFIRE_SW_A7
 	select HAVE_CACHE_CB
 	select HAVE_MBAR
+	select CPU_NO_EFFICIENT_FFS
 	help
 	  Motorola ColdFire 5307 processor support.
 
@@ -242,6 +250,7 @@ config M5407
 	select COLDFIRE_SW_A7
 	select HAVE_CACHE_CB
 	select HAVE_MBAR
+	select CPU_NO_EFFICIENT_FFS
 	help
 	  Motorola ColdFire 5407 processor support.
 
@@ -251,6 +260,7 @@ config M547x
 	select MMU_COLDFIRE if MMU
 	select HAVE_CACHE_CB
 	select HAVE_MBAR
+	select CPU_NO_EFFICIENT_FFS
 	help
 	  Freescale ColdFire 5470/5471/5472/5473/5474/5475 processor support.
 
@@ -260,6 +270,7 @@ config M548x
 	select M54xx
 	select HAVE_CACHE_CB
 	select HAVE_MBAR
+	select CPU_NO_EFFICIENT_FFS
 	help
 	  Freescale ColdFire 5480/5481/5482/5483/5484/5485 processor support.
 
diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig
index e47a08d72819651446cf937ce54333792666cf58..5b7a45d99cfbfd5397554b399a6ec2bd8e4b5582 100644
--- a/arch/metag/Kconfig
+++ b/arch/metag/Kconfig
@@ -30,6 +30,7 @@ config METAG
 	select OF
 	select OF_EARLY_FLATTREE
 	select SPARSE_IRQ
+	select CPU_NO_EFFICIENT_FFS
 
 config STACKTRACE_SUPPORT
 	def_bool y
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 3d793b55f60c04b3b18cdf01b20a1d7c6964e65a..f17c3a4fb697d9b40752c235a43fe2fd65349db9 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -32,6 +32,7 @@ config MICROBLAZE
 	select OF_EARLY_FLATTREE
 	select TRACING_SUPPORT
 	select VIRT_TO_BUS
+	select CPU_NO_EFFICIENT_FFS
 
 config SWAP
 	def_bool n
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index e6f19fc61bf254d6fa05fdbec1164f76dca11c39..e961c8a7ea6626a1b044b366a99c5201e6d5beb3 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -204,6 +204,16 @@
 #endif
 #endif
 
+/* __builtin_constant_p(cpu_has_mips_r) && cpu_has_mips_r */
+#if !((defined(cpu_has_mips32r1) && cpu_has_mips32r1) || \
+	  (defined(cpu_has_mips32r2) && cpu_has_mips32r2) || \
+	  (defined(cpu_has_mips32r6) && cpu_has_mips32r6) || \
+	  (defined(cpu_has_mips64r1) && cpu_has_mips64r1) || \
+	  (defined(cpu_has_mips64r2) && cpu_has_mips64r2) || \
+	  (defined(cpu_has_mips64r6) && cpu_has_mips64r6))
+#define CPU_NO_EFFICIENT_FFS 1
+#endif
+
 #ifndef cpu_has_mips_1
 # define cpu_has_mips_1		(!cpu_has_mips_r6)
 #endif
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 87ca653eb5f3452558ea89c53d641f78dd370a62..51a56c8b04b451ce140a5708b4d5ca57bc7a6404 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -15,6 +15,7 @@ config NIOS2
 	select SOC_BUS
 	select SPARSE_IRQ
 	select USB_ARCH_HAS_HCD if USB_SUPPORT
+	select CPU_NO_EFFICIENT_FFS
 
 config GENERIC_CSUM
 	def_bool y
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index e118c02cc79a085eb59e10513bbf15c0272725a3..142cb057c41bd3218cb4a3634335452e161f172c 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -25,6 +25,7 @@ config OPENRISC
 	select MODULES_USE_ELF_RELA
 	select HAVE_DEBUG_STACKOVERFLOW
 	select OR1K_PIC
+	select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1
 
 config MMU
 	def_bool y
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 88cfaa8af78ea2af3192472920100a4ac1de8207..3d498a676551d0fa778f0bafe0cbced3fe44765d 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -32,6 +32,7 @@ config PARISC
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_SECCOMP_FILTER
 	select ARCH_NO_COHERENT_DMA_MMAP
+	select CPU_NO_EFFICIENT_FFS
 
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1c3c43d9d1b54f5d828c45f272e7dc0be882024f..a8c259059adf0e7b6af3587d59d2d370e0b0cd5d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -123,6 +123,7 @@ config S390
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_EARLY_PFN_TO_NID
 	select HAVE_ARCH_JUMP_LABEL
+	select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_SOFT_DIRTY
 	select HAVE_ARCH_TRACEHOOK
diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index 366e1b599a7b7849b74f17f19308def9028cc971..507d6318138941aa5dec6cc767e992ca598e009f 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -14,6 +14,7 @@ config SCORE
 	select VIRT_TO_BUS
 	select MODULES_USE_ELF_REL
 	select CLONE_BACKWARDS
+	select CPU_NO_EFFICIENT_FFS
 
 choice
 	prompt "System type"
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index f6254341c065da7ac4606b02a342c13b70b8e243..e803a836cb7ca81e9e1546649cc15e2e350dd88c 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -20,6 +20,7 @@ config SUPERH
 	select PERF_USE_VMALLOC
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_KERNEL_GZIP
+	select CPU_NO_EFFICIENT_FFS
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_XZ
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 1012f7ffcdf5f5486ffe67ec185c356509d6f6b6..546293d9e6c52906f2c1f72d1a9545749595f994 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -42,6 +42,7 @@ config SPARC
 	select ODD_RT_SIGACTION
 	select OLD_SIGSUSPEND
 	select ARCH_HAS_SG_CHAIN
+	select CPU_NO_EFFICIENT_FFS
 
 config SPARC32
 	def_bool !64BIT
diff --git a/lib/gcd.c b/lib/gcd.c
index 3657f129d7b8c5db0bee11a94a97d5d4bf251b52..135ee6407a5e4f0a6f7fc8da85f38a74ea37ed38 100644
--- a/lib/gcd.c
+++ b/lib/gcd.c
@@ -2,20 +2,77 @@
 #include <linux/gcd.h>
 #include <linux/export.h>
 
-/* Greatest common divisor */
+/*
+ * This implements the binary GCD algorithm. (Often attributed to Stein,
+ * but as Knuth has noted, appears in a first-century Chinese math text.)
+ *
+ * This is faster than the division-based algorithm even on x86, which
+ * has decent hardware division.
+ */
+
+#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) && !defined(CPU_NO_EFFICIENT_FFS)
+
+/* If __ffs is available, the even/odd algorithm benchmarks slower. */
 unsigned long gcd(unsigned long a, unsigned long b)
 {
-	unsigned long r;
+	unsigned long r = a | b;
+
+	if (!a || !b)
+		return r;
 
-	if (a < b)
-		swap(a, b);
+	b >>= __ffs(b);
+	if (b == 1)
+		return r & -r;
 
-	if (!b)
-		return a;
-	while ((r = a % b) != 0) {
-		a = b;
-		b = r;
+	for (;;) {
+		a >>= __ffs(a);
+		if (a == 1)
+			return r & -r;
+		if (a == b)
+			return a << __ffs(r);
+
+		if (a < b)
+			swap(a, b);
+		a -= b;
 	}
-	return b;
 }
+
+#else
+
+/* If normalization is done by loops, the even/odd algorithm is a win. */
+unsigned long gcd(unsigned long a, unsigned long b)
+{
+	unsigned long r = a | b;
+
+	if (!a || !b)
+		return r;
+
+	/* Isolate lsbit of r */
+	r &= -r;
+
+	while (!(b & r))
+		b >>= 1;
+	if (b == r)
+		return r;
+
+	for (;;) {
+		while (!(a & r))
+			a >>= 1;
+		if (a == r)
+			return r;
+		if (a == b)
+			return a;
+
+		if (a < b)
+			swap(a, b);
+		a -= b;
+		a >>= 1;
+		if (a & r)
+			a += b;
+		a >>= 1;
+	}
+}
+
+#endif
+
 EXPORT_SYMBOL_GPL(gcd);